Index: source/i18n/affixpatternparser.h |
diff --git a/source/i18n/affixpatternparser.h b/source/i18n/affixpatternparser.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..bf6fbe6eec0cbb64bfe48f1b59d45ea492d26d16 |
--- /dev/null |
+++ b/source/i18n/affixpatternparser.h |
@@ -0,0 +1,400 @@ |
+/* |
+******************************************************************************* |
+* Copyright (C) 2015, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+******************************************************************************* |
+* affixpatternparser.h |
+* |
+* created on: 2015jan06 |
+* created by: Travis Keep |
+*/ |
+ |
+#ifndef __AFFIX_PATTERN_PARSER_H__ |
+#define __AFFIX_PATTERN_PARSER_H__ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_FORMATTING |
+ |
+#include "unicode/unistr.h" |
+#include "unicode/uobject.h" |
+#include "pluralaffix.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+class PluralRules; |
+class FixedPrecision; |
+class DecimalFormatSymbols; |
+ |
+/** |
+ * A representation of the various forms of a particular currency according |
+ * to some locale and usage context. |
+ * |
+ * Includes the symbol, ISO code form, and long form(s) of the currency name |
+ * for each plural variation. |
+ */ |
+class U_I18N_API CurrencyAffixInfo : public UMemory { |
+public: |
+ /** |
+ * Symbol is \u00a4; ISO form is \u00a4\u00a4; |
+ * long form is \u00a4\u00a4\u00a4. |
+ */ |
+ CurrencyAffixInfo(); |
+ |
+ const UnicodeString &getSymbol() const { return fSymbol; } |
+ const UnicodeString &getISO() const { return fISO; } |
+ const PluralAffix &getLong() const { return fLong; } |
+ void setSymbol(const UnicodeString &symbol) { |
+ fSymbol = symbol; |
+ fIsDefault = FALSE; |
+ } |
+ void setISO(const UnicodeString &iso) { |
+ fISO = iso; |
+ fIsDefault = FALSE; |
+ } |
+ UBool |
+ equals(const CurrencyAffixInfo &other) const { |
+ return (fSymbol == other.fSymbol) |
+ && (fISO == other.fISO) |
+ && (fLong.equals(other.fLong)) |
+ && (fIsDefault == other.fIsDefault); |
+ } |
+ |
+ /** |
+ * Intializes this instance. |
+ * |
+ * @param locale the locale for the currency forms. |
+ * @param rules The plural rules for the locale. |
+ * @param currency the null terminated, 3 character ISO code of the |
+ * currency. If NULL, resets this instance as if it were just created. |
+ * In this case, the first 2 parameters may be NULL as well. |
+ * @param status any error returned here. |
+ */ |
+ void set( |
+ const char *locale, const PluralRules *rules, |
+ const UChar *currency, UErrorCode &status); |
+ |
+ /** |
+ * Returns true if this instance is the default. That is has no real |
+ * currency. For instance never initialized with set() |
+ * or reset with set(NULL, NULL, NULL, status). |
+ */ |
+ UBool isDefault() const { return fIsDefault; } |
+ |
+ /** |
+ * Adjusts the precision used for a particular currency. |
+ * @param currency the null terminated, 3 character ISO code of the |
+ * currency. |
+ * @param usage the usage of the currency |
+ * @param precision min/max fraction digits and rounding increment |
+ * adjusted. |
+ * @params status any error reported here. |
+ */ |
+ static void adjustPrecision( |
+ const UChar *currency, const UCurrencyUsage usage, |
+ FixedPrecision &precision, UErrorCode &status); |
+ |
+private: |
+ /** |
+ * The symbol form of the currency. |
+ */ |
+ UnicodeString fSymbol; |
+ |
+ /** |
+ * The ISO form of the currency, usually three letter abbreviation. |
+ */ |
+ UnicodeString fISO; |
+ |
+ /** |
+ * The long forms of the currency keyed by plural variation. |
+ */ |
+ PluralAffix fLong; |
+ |
+ UBool fIsDefault; |
+ |
+}; |
+ |
+class AffixPatternIterator; |
+ |
+/** |
+ * A locale agnostic representation of an affix pattern. |
+ */ |
+class U_I18N_API AffixPattern : public UMemory { |
+public: |
+ |
+ /** |
+ * The token types that can appear in an affix pattern. |
+ */ |
+ enum ETokenType { |
+ kLiteral, |
+ kPercent, |
+ kPerMill, |
+ kCurrency, |
+ kNegative, |
+ kPositive |
+ }; |
+ |
+ /** |
+ * An empty affix pattern. |
+ */ |
+ AffixPattern() |
+ : tokens(), literals(), hasCurrencyToken(FALSE), |
+ hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) { |
+ } |
+ |
+ /** |
+ * Adds a string literal to this affix pattern. |
+ */ |
+ void addLiteral(const UChar *, int32_t start, int32_t len); |
+ |
+ /** |
+ * Adds a token to this affix pattern. t must not be kLiteral as |
+ * the addLiteral() method adds literals. |
+ * @param t the token type to add |
+ */ |
+ void add(ETokenType t); |
+ |
+ /** |
+ * Adds a currency token with specific count to this affix pattern. |
+ * @param count the token count. Used to distinguish between |
+ * one, two, or three currency symbols. Note that adding a currency |
+ * token with count=2 (Use ISO code) is different than adding two |
+ * currency tokens each with count=1 (two currency symbols). |
+ */ |
+ void addCurrency(uint8_t count); |
+ |
+ /** |
+ * Makes this instance be an empty affix pattern. |
+ */ |
+ void remove(); |
+ |
+ /** |
+ * Provides an iterator over the tokens in this instance. |
+ * @param result this is initialized to point just before the |
+ * first token of this instance. Caller must call nextToken() |
+ * on the iterator once it is set up to have it actually point |
+ * to the first token. This first call to nextToken() will return |
+ * FALSE if the AffixPattern being iterated over is empty. |
+ * @return result |
+ */ |
+ AffixPatternIterator &iterator(AffixPatternIterator &result) const; |
+ |
+ /** |
+ * Returns TRUE if this instance has currency tokens in it. |
+ */ |
+ UBool usesCurrency() const { |
+ return hasCurrencyToken; |
+ } |
+ |
+ UBool usesPercent() const { |
+ return hasPercentToken; |
+ } |
+ |
+ UBool usesPermill() const { |
+ return hasPermillToken; |
+ } |
+ |
+ /** |
+ * Returns the number of code points a string of this instance |
+ * would have if none of the special tokens were escaped. |
+ * Used to compute the padding size. |
+ */ |
+ int32_t countChar32() const { |
+ return char32Count; |
+ } |
+ |
+ /** |
+ * Appends other to this instance mutating this instance in place. |
+ * @param other The pattern appended to the end of this one. |
+ * @return a reference to this instance for chaining. |
+ */ |
+ AffixPattern &append(const AffixPattern &other); |
+ |
+ /** |
+ * Converts this AffixPattern back into a user string. |
+ * It is the inverse of parseUserAffixString. |
+ */ |
+ UnicodeString &toUserString(UnicodeString &appendTo) const; |
+ |
+ /** |
+ * Converts this AffixPattern back into a string. |
+ * It is the inverse of parseAffixString. |
+ */ |
+ UnicodeString &toString(UnicodeString &appendTo) const; |
+ |
+ /** |
+ * Parses an affix pattern string appending it to an AffixPattern. |
+ * Parses affix pattern strings produced from using |
+ * DecimalFormatPatternParser to parse a format pattern. Affix patterns |
+ * include the positive prefix and suffix and the negative prefix |
+ * and suffix. This method expects affix patterns strings to be in the |
+ * same format that DecimalFormatPatternParser produces. Namely special |
+ * characters in the affix that correspond to a field type must be |
+ * prefixed with an apostrophe ('). These special character sequences |
+ * inluce minus (-), percent (%), permile (U+2030), plus (+), |
+ * short currency (U+00a4), medium currency (u+00a4 * 2), |
+ * long currency (u+a4 * 3), and apostrophe (') |
+ * (apostrophe does not correspond to a field type but has to be escaped |
+ * because it itself is the escape character). |
+ * Since the expansion of these special character |
+ * sequences is locale dependent, these sequences are not expanded in |
+ * an AffixPattern instance. |
+ * If these special characters are not prefixed with an apostrophe in |
+ * the affix pattern string, then they are treated verbatim just as |
+ * any other character. If an apostrophe prefixes a non special |
+ * character in the affix pattern, the apostrophe is simply ignored. |
+ * |
+ * @param affixStr the string from DecimalFormatPatternParser |
+ * @param appendTo parsed result appended here. |
+ * @param status any error parsing returned here. |
+ */ |
+ static AffixPattern &parseAffixString( |
+ const UnicodeString &affixStr, |
+ AffixPattern &appendTo, |
+ UErrorCode &status); |
+ |
+ /** |
+ * Parses an affix pattern string appending it to an AffixPattern. |
+ * Parses affix pattern strings as the user would supply them. |
+ * In this function, quoting makes special characters like normal |
+ * characters whereas in parseAffixString, quoting makes special |
+ * characters special. |
+ * |
+ * @param affixStr the string from the user |
+ * @param appendTo parsed result appended here. |
+ * @param status any error parsing returned here. |
+ */ |
+ static AffixPattern &parseUserAffixString( |
+ const UnicodeString &affixStr, |
+ AffixPattern &appendTo, |
+ UErrorCode &status); |
+ |
+ UBool equals(const AffixPattern &other) const { |
+ return (tokens == other.tokens) |
+ && (literals == other.literals) |
+ && (hasCurrencyToken == other.hasCurrencyToken) |
+ && (hasPercentToken == other.hasPercentToken) |
+ && (hasPermillToken == other.hasPermillToken) |
+ && (char32Count == other.char32Count); |
+ } |
+ |
+private: |
+ /* |
+ * Tokens stored here. Each UChar generally stands for one token. A |
+ * Each token is of form 'etttttttllllllll' llllllll is the length of |
+ * the token and ranges from 0-255. ttttttt is the token type and ranges |
+ * from 0-127. If e is set it means this is an extendo token (to be |
+ * described later). To accomodate token lengths above 255, each normal |
+ * token (e=0) can be followed by 0 or more extendo tokens (e=1) with |
+ * the same type. Right now only kLiteral Tokens have extendo tokens. |
+ * Each extendo token provides the next 8 higher bits for the length. |
+ * If a kLiteral token is followed by 2 extendo tokens then, then the |
+ * llllllll of the next extendo token contains bits 8-15 of the length |
+ * and the last extendo token contains bits 16-23 of the length. |
+ */ |
+ UnicodeString tokens; |
+ |
+ /* |
+ * The characters of the kLiteral tokens are concatenated together here. |
+ * The first characters go with the first kLiteral token, the next |
+ * characters go with the next kLiteral token etc. |
+ */ |
+ UnicodeString literals; |
+ UBool hasCurrencyToken; |
+ UBool hasPercentToken; |
+ UBool hasPermillToken; |
+ int32_t char32Count; |
+ void add(ETokenType t, uint8_t count); |
+ |
+}; |
+ |
+/** |
+ * An iterator over the tokens in an AffixPattern instance. |
+ */ |
+class U_I18N_API AffixPatternIterator : public UMemory { |
+public: |
+ |
+ /** |
+ * Using an iterator without first calling iterator on an AffixPattern |
+ * instance to initialize the iterator results in |
+ * undefined behavior. |
+ */ |
+ AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { } |
+ /** |
+ * Advances this iterator to the next token. Returns FALSE when there |
+ * are no more tokens. Calling the other methods after nextToken() |
+ * returns FALSE results in undefined behavior. |
+ */ |
+ UBool nextToken(); |
+ |
+ /** |
+ * Returns the type of token. |
+ */ |
+ AffixPattern::ETokenType getTokenType() const; |
+ |
+ /** |
+ * For literal tokens, returns the literal string. Calling this for |
+ * other token types results in undefined behavior. |
+ * @param result replaced with a read-only alias to the literal string. |
+ * @return result |
+ */ |
+ UnicodeString &getLiteral(UnicodeString &result) const; |
+ |
+ /** |
+ * Returns the token length. Usually 1, but for currency tokens may |
+ * be 2 for ISO code and 3 for long form. |
+ */ |
+ int32_t getTokenLength() const; |
+private: |
+ int32_t nextLiteralIndex; |
+ int32_t lastLiteralLength; |
+ int32_t nextTokenIndex; |
+ const UnicodeString *tokens; |
+ const UnicodeString *literals; |
+ friend class AffixPattern; |
+ AffixPatternIterator(const AffixPatternIterator &); |
+ AffixPatternIterator &operator=(const AffixPatternIterator &); |
+}; |
+ |
+/** |
+ * A locale aware class that converts locale independent AffixPattern |
+ * instances into locale dependent PluralAffix instances. |
+ */ |
+class U_I18N_API AffixPatternParser : public UMemory { |
+public: |
+AffixPatternParser(); |
+AffixPatternParser(const DecimalFormatSymbols &symbols); |
+void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols); |
+ |
+/** |
+ * Parses affixPattern appending the result to appendTo. |
+ * @param affixPattern The affix pattern. |
+ * @param currencyAffixInfo contains the currency forms. |
+ * @param appendTo The result of parsing affixPattern is appended here. |
+ * @param status any error returned here. |
+ * @return appendTo. |
+ */ |
+PluralAffix &parse( |
+ const AffixPattern &affixPattern, |
+ const CurrencyAffixInfo ¤cyAffixInfo, |
+ PluralAffix &appendTo, |
+ UErrorCode &status) const; |
+ |
+UBool equals(const AffixPatternParser &other) const { |
+ return (fPercent == other.fPercent) |
+ && (fPermill == other.fPermill) |
+ && (fNegative == other.fNegative) |
+ && (fPositive == other.fPositive); |
+} |
+ |
+private: |
+UnicodeString fPercent; |
+UnicodeString fPermill; |
+UnicodeString fNegative; |
+UnicodeString fPositive; |
+}; |
+ |
+ |
+U_NAMESPACE_END |
+#endif /* #if !UCONFIG_NO_FORMATTING */ |
+#endif // __AFFIX_PATTERN_PARSER_H__ |