Index: source/i18n/affixpatternparser.cpp |
diff --git a/source/i18n/affixpatternparser.cpp b/source/i18n/affixpatternparser.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..41eadef2b3a88a8e22f4b3faa310ce175b1891cc |
--- /dev/null |
+++ b/source/i18n/affixpatternparser.cpp |
@@ -0,0 +1,692 @@ |
+/* |
+ * Copyright (C) 2015, International Business Machines |
+ * Corporation and others. All Rights Reserved. |
+ * |
+ * file name: affixpatternparser.cpp |
+ */ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_FORMATTING |
+ |
+#include "unicode/dcfmtsym.h" |
+#include "unicode/plurrule.h" |
+#include "unicode/ucurr.h" |
+#include "affixpatternparser.h" |
+#include "charstr.h" |
+#include "precision.h" |
+#include "uassert.h" |
+#include "unistrappender.h" |
+ |
+ static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4}; |
+ |
+static UChar gPercent = 0x25; |
+static UChar gPerMill = 0x2030; |
+static UChar gNegative = 0x2D; |
+static UChar gPositive = 0x2B; |
+ |
+#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF))) |
+ |
+#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F)) |
+ |
+#define UNPACK_LONG(c) (((c) >> 8) & 0x80) |
+ |
+#define UNPACK_LENGTH(c) ((c) & 0xFF) |
+ |
+U_NAMESPACE_BEGIN |
+ |
+static int32_t |
+nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { |
+ if (buffer[idx] != 0x27 || idx + 1 == len) { |
+ *token = buffer[idx]; |
+ return 1; |
+ } |
+ *token = buffer[idx + 1]; |
+ if (buffer[idx + 1] == 0xA4) { |
+ int32_t i = 2; |
+ for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i); |
+ return i; |
+ } |
+ return 2; |
+} |
+ |
+static int32_t |
+nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { |
+ *token = buffer[idx]; |
+ int32_t max; |
+ switch (buffer[idx]) { |
+ case 0x27: |
+ max = 2; |
+ break; |
+ case 0xA4: |
+ max = 3; |
+ break; |
+ default: |
+ max = 1; |
+ break; |
+ } |
+ int32_t i = 1; |
+ for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i); |
+ return i; |
+} |
+ |
+CurrencyAffixInfo::CurrencyAffixInfo() |
+ : fSymbol(gDefaultSymbols, 1), |
+ fISO(gDefaultSymbols, 2), |
+ fLong(DigitAffix(gDefaultSymbols, 3)), |
+ fIsDefault(TRUE) { |
+} |
+ |
+void |
+CurrencyAffixInfo::set( |
+ const char *locale, |
+ const PluralRules *rules, |
+ const UChar *currency, |
+ UErrorCode &status) { |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ fIsDefault = FALSE; |
+ if (currency == NULL) { |
+ fSymbol.setTo(gDefaultSymbols, 1); |
+ fISO.setTo(gDefaultSymbols, 2); |
+ fLong.remove(); |
+ fLong.append(gDefaultSymbols, 3); |
+ fIsDefault = TRUE; |
+ return; |
+ } |
+ int32_t len; |
+ UBool unusedIsChoice; |
+ const UChar *symbol = ucurr_getName( |
+ currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice, |
+ &len, &status); |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ fSymbol.setTo(symbol, len); |
+ fISO.setTo(currency, u_strlen(currency)); |
+ fLong.remove(); |
+ StringEnumeration* keywords = rules->getKeywords(status); |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ const UnicodeString* pluralCount; |
+ while ((pluralCount = keywords->snext(status)) != NULL) { |
+ CharString pCount; |
+ pCount.appendInvariantChars(*pluralCount, status); |
+ const UChar *pluralName = ucurr_getPluralName( |
+ currency, locale, &unusedIsChoice, pCount.data(), |
+ &len, &status); |
+ fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status); |
+ } |
+ delete keywords; |
+} |
+ |
+void |
+CurrencyAffixInfo::adjustPrecision( |
+ const UChar *currency, const UCurrencyUsage usage, |
+ FixedPrecision &precision, UErrorCode &status) { |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage( |
+ currency, usage, &status); |
+ precision.fMin.setFracDigitCount(digitCount); |
+ precision.fMax.setFracDigitCount(digitCount); |
+ double increment = ucurr_getRoundingIncrementForUsage( |
+ currency, usage, &status); |
+ if (increment == 0.0) { |
+ precision.fRoundingIncrement.clear(); |
+ } else { |
+ precision.fRoundingIncrement.set(increment); |
+ // guard against round-off error |
+ precision.fRoundingIncrement.round(6); |
+ } |
+} |
+ |
+void |
+AffixPattern::addLiteral( |
+ const UChar *literal, int32_t start, int32_t len) { |
+ char32Count += u_countChar32(literal + start, len); |
+ literals.append(literal, start, len); |
+ int32_t tlen = tokens.length(); |
+ // Takes 4 UChars to encode maximum literal length. |
+ UChar *tokenChars = tokens.getBuffer(tlen + 4); |
+ |
+ // find start of literal size. May be tlen if there is no literal. |
+ // While finding start of literal size, compute literal length |
+ int32_t literalLength = 0; |
+ int32_t tLiteralStart = tlen; |
+ while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) { |
+ tLiteralStart--; |
+ literalLength <<= 8; |
+ literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]); |
+ } |
+ // Add number of chars we just added to literal |
+ literalLength += len; |
+ |
+ // Now encode the new length starting at tLiteralStart |
+ tlen = tLiteralStart; |
+ tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF); |
+ literalLength >>= 8; |
+ while (literalLength) { |
+ tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF); |
+ literalLength >>= 8; |
+ } |
+ tokens.releaseBuffer(tlen); |
+} |
+ |
+void |
+AffixPattern::add(ETokenType t) { |
+ add(t, 1); |
+} |
+ |
+void |
+AffixPattern::addCurrency(uint8_t count) { |
+ add(kCurrency, count); |
+} |
+ |
+void |
+AffixPattern::add(ETokenType t, uint8_t count) { |
+ U_ASSERT(t != kLiteral); |
+ char32Count += count; |
+ switch (t) { |
+ case kCurrency: |
+ hasCurrencyToken = TRUE; |
+ break; |
+ case kPercent: |
+ hasPercentToken = TRUE; |
+ break; |
+ case kPerMill: |
+ hasPermillToken = TRUE; |
+ break; |
+ default: |
+ // Do nothing |
+ break; |
+ } |
+ tokens.append(PACK_TOKEN_AND_LENGTH(t, count)); |
+} |
+ |
+AffixPattern & |
+AffixPattern::append(const AffixPattern &other) { |
+ AffixPatternIterator iter; |
+ other.iterator(iter); |
+ UnicodeString literal; |
+ while (iter.nextToken()) { |
+ switch (iter.getTokenType()) { |
+ case kLiteral: |
+ iter.getLiteral(literal); |
+ addLiteral(literal.getBuffer(), 0, literal.length()); |
+ break; |
+ case kCurrency: |
+ addCurrency(iter.getTokenLength()); |
+ break; |
+ default: |
+ add(iter.getTokenType()); |
+ break; |
+ } |
+ } |
+ return *this; |
+} |
+ |
+void |
+AffixPattern::remove() { |
+ tokens.remove(); |
+ literals.remove(); |
+ hasCurrencyToken = FALSE; |
+ hasPercentToken = FALSE; |
+ hasPermillToken = FALSE; |
+ char32Count = 0; |
+} |
+ |
+// escapes literals for strings where special characters are NOT escaped |
+// except for apostrophe. |
+static void escapeApostropheInLiteral( |
+ const UnicodeString &literal, UnicodeStringAppender &appender) { |
+ int32_t len = literal.length(); |
+ const UChar *buffer = literal.getBuffer(); |
+ for (int32_t i = 0; i < len; ++i) { |
+ UChar ch = buffer[i]; |
+ switch (ch) { |
+ case 0x27: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x27); |
+ break; |
+ default: |
+ appender.append(ch); |
+ break; |
+ } |
+ } |
+} |
+ |
+ |
+// escapes literals for user strings where special characters in literals |
+// are escaped with apostrophe. |
+static void escapeLiteral( |
+ const UnicodeString &literal, UnicodeStringAppender &appender) { |
+ int32_t len = literal.length(); |
+ const UChar *buffer = literal.getBuffer(); |
+ for (int32_t i = 0; i < len; ++i) { |
+ UChar ch = buffer[i]; |
+ switch (ch) { |
+ case 0x27: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x27); |
+ break; |
+ case 0x25: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x25); |
+ appender.append((UChar) 0x27); |
+ break; |
+ case 0x2030: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x2030); |
+ appender.append((UChar) 0x27); |
+ break; |
+ case 0xA4: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0xA4); |
+ appender.append((UChar) 0x27); |
+ break; |
+ case 0x2D: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x2D); |
+ appender.append((UChar) 0x27); |
+ break; |
+ case 0x2B: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x2B); |
+ appender.append((UChar) 0x27); |
+ break; |
+ default: |
+ appender.append(ch); |
+ break; |
+ } |
+ } |
+} |
+ |
+UnicodeString & |
+AffixPattern::toString(UnicodeString &appendTo) const { |
+ AffixPatternIterator iter; |
+ iterator(iter); |
+ UnicodeStringAppender appender(appendTo); |
+ UnicodeString literal; |
+ while (iter.nextToken()) { |
+ switch (iter.getTokenType()) { |
+ case kLiteral: |
+ escapeApostropheInLiteral(iter.getLiteral(literal), appender); |
+ break; |
+ case kPercent: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x25); |
+ break; |
+ case kPerMill: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x2030); |
+ break; |
+ case kCurrency: |
+ { |
+ appender.append((UChar) 0x27); |
+ int32_t cl = iter.getTokenLength(); |
+ for (int32_t i = 0; i < cl; ++i) { |
+ appender.append((UChar) 0xA4); |
+ } |
+ } |
+ break; |
+ case kNegative: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x2D); |
+ break; |
+ case kPositive: |
+ appender.append((UChar) 0x27); |
+ appender.append((UChar) 0x2B); |
+ break; |
+ default: |
+ U_ASSERT(FALSE); |
+ break; |
+ } |
+ } |
+ return appendTo; |
+} |
+ |
+UnicodeString & |
+AffixPattern::toUserString(UnicodeString &appendTo) const { |
+ AffixPatternIterator iter; |
+ iterator(iter); |
+ UnicodeStringAppender appender(appendTo); |
+ UnicodeString literal; |
+ while (iter.nextToken()) { |
+ switch (iter.getTokenType()) { |
+ case kLiteral: |
+ escapeLiteral(iter.getLiteral(literal), appender); |
+ break; |
+ case kPercent: |
+ appender.append((UChar) 0x25); |
+ break; |
+ case kPerMill: |
+ appender.append((UChar) 0x2030); |
+ break; |
+ case kCurrency: |
+ { |
+ int32_t cl = iter.getTokenLength(); |
+ for (int32_t i = 0; i < cl; ++i) { |
+ appender.append((UChar) 0xA4); |
+ } |
+ } |
+ break; |
+ case kNegative: |
+ appender.append((UChar) 0x2D); |
+ break; |
+ case kPositive: |
+ appender.append((UChar) 0x2B); |
+ break; |
+ default: |
+ U_ASSERT(FALSE); |
+ break; |
+ } |
+ } |
+ return appendTo; |
+} |
+ |
+class AffixPatternAppender : public UMemory { |
+public: |
+ AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { } |
+ |
+ inline void append(UChar x) { |
+ if (fIdx == UPRV_LENGTHOF(fBuffer)) { |
+ fDest->addLiteral(fBuffer, 0, fIdx); |
+ fIdx = 0; |
+ } |
+ fBuffer[fIdx++] = x; |
+ } |
+ |
+ inline void append(UChar32 x) { |
+ if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { |
+ fDest->addLiteral(fBuffer, 0, fIdx); |
+ fIdx = 0; |
+ } |
+ U16_APPEND_UNSAFE(fBuffer, fIdx, x); |
+ } |
+ |
+ inline void flush() { |
+ if (fIdx) { |
+ fDest->addLiteral(fBuffer, 0, fIdx); |
+ } |
+ fIdx = 0; |
+ } |
+ |
+ /** |
+ * flush the buffer when we go out of scope. |
+ */ |
+ ~AffixPatternAppender() { |
+ flush(); |
+ } |
+private: |
+ AffixPattern *fDest; |
+ int32_t fIdx; |
+ UChar fBuffer[32]; |
+ AffixPatternAppender(const AffixPatternAppender &other); |
+ AffixPatternAppender &operator=(const AffixPatternAppender &other); |
+}; |
+ |
+ |
+AffixPattern & |
+AffixPattern::parseUserAffixString( |
+ const UnicodeString &affixStr, |
+ AffixPattern &appendTo, |
+ UErrorCode &status) { |
+ if (U_FAILURE(status)) { |
+ return appendTo; |
+ } |
+ int32_t len = affixStr.length(); |
+ const UChar *buffer = affixStr.getBuffer(); |
+ // 0 = not quoted; 1 = quoted. |
+ int32_t state = 0; |
+ AffixPatternAppender appender(appendTo); |
+ for (int32_t i = 0; i < len; ) { |
+ UChar token; |
+ int32_t tokenSize = nextUserToken(buffer, i, len, &token); |
+ i += tokenSize; |
+ if (token == 0x27 && tokenSize == 1) { // quote |
+ state = 1 - state; |
+ continue; |
+ } |
+ if (state == 0) { |
+ switch (token) { |
+ case 0x25: |
+ appender.flush(); |
+ appendTo.add(kPercent, 1); |
+ break; |
+ case 0x27: // double quote |
+ appender.append((UChar) 0x27); |
+ break; |
+ case 0x2030: |
+ appender.flush(); |
+ appendTo.add(kPerMill, 1); |
+ break; |
+ case 0x2D: |
+ appender.flush(); |
+ appendTo.add(kNegative, 1); |
+ break; |
+ case 0x2B: |
+ appender.flush(); |
+ appendTo.add(kPositive, 1); |
+ break; |
+ case 0xA4: |
+ appender.flush(); |
+ appendTo.add(kCurrency, tokenSize); |
+ break; |
+ default: |
+ appender.append(token); |
+ break; |
+ } |
+ } else { |
+ switch (token) { |
+ case 0x27: // double quote |
+ appender.append((UChar) 0x27); |
+ break; |
+ case 0xA4: // included b/c tokenSize can be > 1 |
+ for (int32_t j = 0; j < tokenSize; ++j) { |
+ appender.append((UChar) 0xA4); |
+ } |
+ break; |
+ default: |
+ appender.append(token); |
+ break; |
+ } |
+ } |
+ } |
+ return appendTo; |
+} |
+ |
+AffixPattern & |
+AffixPattern::parseAffixString( |
+ const UnicodeString &affixStr, |
+ AffixPattern &appendTo, |
+ UErrorCode &status) { |
+ if (U_FAILURE(status)) { |
+ return appendTo; |
+ } |
+ int32_t len = affixStr.length(); |
+ const UChar *buffer = affixStr.getBuffer(); |
+ for (int32_t i = 0; i < len; ) { |
+ UChar token; |
+ int32_t tokenSize = nextToken(buffer, i, len, &token); |
+ if (tokenSize == 1) { |
+ int32_t literalStart = i; |
+ ++i; |
+ while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) { |
+ ++i; |
+ } |
+ appendTo.addLiteral(buffer, literalStart, i - literalStart); |
+ |
+ // If we reached end of string, we are done |
+ if (i == len) { |
+ return appendTo; |
+ } |
+ } |
+ i += tokenSize; |
+ switch (token) { |
+ case 0x25: |
+ appendTo.add(kPercent, 1); |
+ break; |
+ case 0x2030: |
+ appendTo.add(kPerMill, 1); |
+ break; |
+ case 0x2D: |
+ appendTo.add(kNegative, 1); |
+ break; |
+ case 0x2B: |
+ appendTo.add(kPositive, 1); |
+ break; |
+ case 0xA4: |
+ { |
+ if (tokenSize - 1 > 3) { |
+ status = U_PARSE_ERROR; |
+ return appendTo; |
+ } |
+ appendTo.add(kCurrency, tokenSize - 1); |
+ } |
+ break; |
+ default: |
+ appendTo.addLiteral(&token, 0, 1); |
+ break; |
+ } |
+ } |
+ return appendTo; |
+} |
+ |
+AffixPatternIterator & |
+AffixPattern::iterator(AffixPatternIterator &result) const { |
+ result.nextLiteralIndex = 0; |
+ result.lastLiteralLength = 0; |
+ result.nextTokenIndex = 0; |
+ result.tokens = &tokens; |
+ result.literals = &literals; |
+ return result; |
+} |
+ |
+UBool |
+AffixPatternIterator::nextToken() { |
+ int32_t tlen = tokens->length(); |
+ if (nextTokenIndex == tlen) { |
+ return FALSE; |
+ } |
+ ++nextTokenIndex; |
+ const UChar *tokenBuffer = tokens->getBuffer(); |
+ if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) == |
+ AffixPattern::kLiteral) { |
+ while (nextTokenIndex < tlen && |
+ UNPACK_LONG(tokenBuffer[nextTokenIndex])) { |
+ ++nextTokenIndex; |
+ } |
+ lastLiteralLength = 0; |
+ int32_t i = nextTokenIndex - 1; |
+ for (; UNPACK_LONG(tokenBuffer[i]); --i) { |
+ lastLiteralLength <<= 8; |
+ lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); |
+ } |
+ lastLiteralLength <<= 8; |
+ lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); |
+ nextLiteralIndex += lastLiteralLength; |
+ } |
+ return TRUE; |
+} |
+ |
+AffixPattern::ETokenType |
+AffixPatternIterator::getTokenType() const { |
+ return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1)); |
+} |
+ |
+UnicodeString & |
+AffixPatternIterator::getLiteral(UnicodeString &result) const { |
+ const UChar *buffer = literals->getBuffer(); |
+ result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength); |
+ return result; |
+} |
+ |
+int32_t |
+AffixPatternIterator::getTokenLength() const { |
+ const UChar *tokenBuffer = tokens->getBuffer(); |
+ AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]); |
+ return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]); |
+} |
+ |
+AffixPatternParser::AffixPatternParser() |
+ : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) { |
+} |
+ |
+AffixPatternParser::AffixPatternParser( |
+ const DecimalFormatSymbols &symbols) { |
+ setDecimalFormatSymbols(symbols); |
+} |
+ |
+void |
+AffixPatternParser::setDecimalFormatSymbols( |
+ const DecimalFormatSymbols &symbols) { |
+ fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); |
+ fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); |
+ fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); |
+ fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); |
+} |
+ |
+PluralAffix & |
+AffixPatternParser::parse( |
+ const AffixPattern &affixPattern, |
+ const CurrencyAffixInfo ¤cyAffixInfo, |
+ PluralAffix &appendTo, |
+ UErrorCode &status) const { |
+ if (U_FAILURE(status)) { |
+ return appendTo; |
+ } |
+ AffixPatternIterator iter; |
+ affixPattern.iterator(iter); |
+ UnicodeString literal; |
+ while (iter.nextToken()) { |
+ switch (iter.getTokenType()) { |
+ case AffixPattern::kPercent: |
+ appendTo.append(fPercent, UNUM_PERCENT_FIELD); |
+ break; |
+ case AffixPattern::kPerMill: |
+ appendTo.append(fPermill, UNUM_PERMILL_FIELD); |
+ break; |
+ case AffixPattern::kNegative: |
+ appendTo.append(fNegative, UNUM_SIGN_FIELD); |
+ break; |
+ case AffixPattern::kPositive: |
+ appendTo.append(fPositive, UNUM_SIGN_FIELD); |
+ break; |
+ case AffixPattern::kCurrency: |
+ switch (iter.getTokenLength()) { |
+ case 1: |
+ appendTo.append( |
+ currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD); |
+ break; |
+ case 2: |
+ appendTo.append( |
+ currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD); |
+ break; |
+ case 3: |
+ appendTo.append( |
+ currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status); |
+ break; |
+ default: |
+ U_ASSERT(FALSE); |
+ break; |
+ } |
+ break; |
+ case AffixPattern::kLiteral: |
+ appendTo.append(iter.getLiteral(literal)); |
+ break; |
+ default: |
+ U_ASSERT(FALSE); |
+ break; |
+ } |
+ } |
+ return appendTo; |
+} |
+ |
+ |
+U_NAMESPACE_END |
+#endif /* #if !UCONFIG_NO_FORMATTING */ |