OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2015, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * affixpatternparser.h |
| 7 * |
| 8 * created on: 2015jan06 |
| 9 * created by: Travis Keep |
| 10 */ |
| 11 |
| 12 #ifndef __AFFIX_PATTERN_PARSER_H__ |
| 13 #define __AFFIX_PATTERN_PARSER_H__ |
| 14 |
| 15 #include "unicode/utypes.h" |
| 16 |
| 17 #if !UCONFIG_NO_FORMATTING |
| 18 |
| 19 #include "unicode/unistr.h" |
| 20 #include "unicode/uobject.h" |
| 21 #include "pluralaffix.h" |
| 22 |
| 23 U_NAMESPACE_BEGIN |
| 24 |
| 25 class PluralRules; |
| 26 class FixedPrecision; |
| 27 class DecimalFormatSymbols; |
| 28 |
| 29 /** |
| 30 * A representation of the various forms of a particular currency according |
| 31 * to some locale and usage context. |
| 32 * |
| 33 * Includes the symbol, ISO code form, and long form(s) of the currency name |
| 34 * for each plural variation. |
| 35 */ |
| 36 class U_I18N_API CurrencyAffixInfo : public UMemory { |
| 37 public: |
| 38 /** |
| 39 * Symbol is \u00a4; ISO form is \u00a4\u00a4; |
| 40 * long form is \u00a4\u00a4\u00a4. |
| 41 */ |
| 42 CurrencyAffixInfo(); |
| 43 |
| 44 const UnicodeString &getSymbol() const { return fSymbol; } |
| 45 const UnicodeString &getISO() const { return fISO; } |
| 46 const PluralAffix &getLong() const { return fLong; } |
| 47 void setSymbol(const UnicodeString &symbol) { |
| 48 fSymbol = symbol; |
| 49 fIsDefault = FALSE; |
| 50 } |
| 51 void setISO(const UnicodeString &iso) { |
| 52 fISO = iso; |
| 53 fIsDefault = FALSE; |
| 54 } |
| 55 UBool |
| 56 equals(const CurrencyAffixInfo &other) const { |
| 57 return (fSymbol == other.fSymbol) |
| 58 && (fISO == other.fISO) |
| 59 && (fLong.equals(other.fLong)) |
| 60 && (fIsDefault == other.fIsDefault); |
| 61 } |
| 62 |
| 63 /** |
| 64 * Intializes this instance. |
| 65 * |
| 66 * @param locale the locale for the currency forms. |
| 67 * @param rules The plural rules for the locale. |
| 68 * @param currency the null terminated, 3 character ISO code of the |
| 69 * currency. If NULL, resets this instance as if it were just created. |
| 70 * In this case, the first 2 parameters may be NULL as well. |
| 71 * @param status any error returned here. |
| 72 */ |
| 73 void set( |
| 74 const char *locale, const PluralRules *rules, |
| 75 const UChar *currency, UErrorCode &status); |
| 76 |
| 77 /** |
| 78 * Returns true if this instance is the default. That is has no real |
| 79 * currency. For instance never initialized with set() |
| 80 * or reset with set(NULL, NULL, NULL, status). |
| 81 */ |
| 82 UBool isDefault() const { return fIsDefault; } |
| 83 |
| 84 /** |
| 85 * Adjusts the precision used for a particular currency. |
| 86 * @param currency the null terminated, 3 character ISO code of the |
| 87 * currency. |
| 88 * @param usage the usage of the currency |
| 89 * @param precision min/max fraction digits and rounding increment |
| 90 * adjusted. |
| 91 * @params status any error reported here. |
| 92 */ |
| 93 static void adjustPrecision( |
| 94 const UChar *currency, const UCurrencyUsage usage, |
| 95 FixedPrecision &precision, UErrorCode &status); |
| 96 |
| 97 private: |
| 98 /** |
| 99 * The symbol form of the currency. |
| 100 */ |
| 101 UnicodeString fSymbol; |
| 102 |
| 103 /** |
| 104 * The ISO form of the currency, usually three letter abbreviation. |
| 105 */ |
| 106 UnicodeString fISO; |
| 107 |
| 108 /** |
| 109 * The long forms of the currency keyed by plural variation. |
| 110 */ |
| 111 PluralAffix fLong; |
| 112 |
| 113 UBool fIsDefault; |
| 114 |
| 115 }; |
| 116 |
| 117 class AffixPatternIterator; |
| 118 |
| 119 /** |
| 120 * A locale agnostic representation of an affix pattern. |
| 121 */ |
| 122 class U_I18N_API AffixPattern : public UMemory { |
| 123 public: |
| 124 |
| 125 /** |
| 126 * The token types that can appear in an affix pattern. |
| 127 */ |
| 128 enum ETokenType { |
| 129 kLiteral, |
| 130 kPercent, |
| 131 kPerMill, |
| 132 kCurrency, |
| 133 kNegative, |
| 134 kPositive |
| 135 }; |
| 136 |
| 137 /** |
| 138 * An empty affix pattern. |
| 139 */ |
| 140 AffixPattern() |
| 141 : tokens(), literals(), hasCurrencyToken(FALSE), |
| 142 hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) { |
| 143 } |
| 144 |
| 145 /** |
| 146 * Adds a string literal to this affix pattern. |
| 147 */ |
| 148 void addLiteral(const UChar *, int32_t start, int32_t len); |
| 149 |
| 150 /** |
| 151 * Adds a token to this affix pattern. t must not be kLiteral as |
| 152 * the addLiteral() method adds literals. |
| 153 * @param t the token type to add |
| 154 */ |
| 155 void add(ETokenType t); |
| 156 |
| 157 /** |
| 158 * Adds a currency token with specific count to this affix pattern. |
| 159 * @param count the token count. Used to distinguish between |
| 160 * one, two, or three currency symbols. Note that adding a currency |
| 161 * token with count=2 (Use ISO code) is different than adding two |
| 162 * currency tokens each with count=1 (two currency symbols). |
| 163 */ |
| 164 void addCurrency(uint8_t count); |
| 165 |
| 166 /** |
| 167 * Makes this instance be an empty affix pattern. |
| 168 */ |
| 169 void remove(); |
| 170 |
| 171 /** |
| 172 * Provides an iterator over the tokens in this instance. |
| 173 * @param result this is initialized to point just before the |
| 174 * first token of this instance. Caller must call nextToken() |
| 175 * on the iterator once it is set up to have it actually point |
| 176 * to the first token. This first call to nextToken() will return |
| 177 * FALSE if the AffixPattern being iterated over is empty. |
| 178 * @return result |
| 179 */ |
| 180 AffixPatternIterator &iterator(AffixPatternIterator &result) const; |
| 181 |
| 182 /** |
| 183 * Returns TRUE if this instance has currency tokens in it. |
| 184 */ |
| 185 UBool usesCurrency() const { |
| 186 return hasCurrencyToken; |
| 187 } |
| 188 |
| 189 UBool usesPercent() const { |
| 190 return hasPercentToken; |
| 191 } |
| 192 |
| 193 UBool usesPermill() const { |
| 194 return hasPermillToken; |
| 195 } |
| 196 |
| 197 /** |
| 198 * Returns the number of code points a string of this instance |
| 199 * would have if none of the special tokens were escaped. |
| 200 * Used to compute the padding size. |
| 201 */ |
| 202 int32_t countChar32() const { |
| 203 return char32Count; |
| 204 } |
| 205 |
| 206 /** |
| 207 * Appends other to this instance mutating this instance in place. |
| 208 * @param other The pattern appended to the end of this one. |
| 209 * @return a reference to this instance for chaining. |
| 210 */ |
| 211 AffixPattern &append(const AffixPattern &other); |
| 212 |
| 213 /** |
| 214 * Converts this AffixPattern back into a user string. |
| 215 * It is the inverse of parseUserAffixString. |
| 216 */ |
| 217 UnicodeString &toUserString(UnicodeString &appendTo) const; |
| 218 |
| 219 /** |
| 220 * Converts this AffixPattern back into a string. |
| 221 * It is the inverse of parseAffixString. |
| 222 */ |
| 223 UnicodeString &toString(UnicodeString &appendTo) const; |
| 224 |
| 225 /** |
| 226 * Parses an affix pattern string appending it to an AffixPattern. |
| 227 * Parses affix pattern strings produced from using |
| 228 * DecimalFormatPatternParser to parse a format pattern. Affix patterns |
| 229 * include the positive prefix and suffix and the negative prefix |
| 230 * and suffix. This method expects affix patterns strings to be in the |
| 231 * same format that DecimalFormatPatternParser produces. Namely special |
| 232 * characters in the affix that correspond to a field type must be |
| 233 * prefixed with an apostrophe ('). These special character sequences |
| 234 * inluce minus (-), percent (%), permile (U+2030), plus (+), |
| 235 * short currency (U+00a4), medium currency (u+00a4 * 2), |
| 236 * long currency (u+a4 * 3), and apostrophe (') |
| 237 * (apostrophe does not correspond to a field type but has to be escaped |
| 238 * because it itself is the escape character). |
| 239 * Since the expansion of these special character |
| 240 * sequences is locale dependent, these sequences are not expanded in |
| 241 * an AffixPattern instance. |
| 242 * If these special characters are not prefixed with an apostrophe in |
| 243 * the affix pattern string, then they are treated verbatim just as |
| 244 * any other character. If an apostrophe prefixes a non special |
| 245 * character in the affix pattern, the apostrophe is simply ignored. |
| 246 * |
| 247 * @param affixStr the string from DecimalFormatPatternParser |
| 248 * @param appendTo parsed result appended here. |
| 249 * @param status any error parsing returned here. |
| 250 */ |
| 251 static AffixPattern &parseAffixString( |
| 252 const UnicodeString &affixStr, |
| 253 AffixPattern &appendTo, |
| 254 UErrorCode &status); |
| 255 |
| 256 /** |
| 257 * Parses an affix pattern string appending it to an AffixPattern. |
| 258 * Parses affix pattern strings as the user would supply them. |
| 259 * In this function, quoting makes special characters like normal |
| 260 * characters whereas in parseAffixString, quoting makes special |
| 261 * characters special. |
| 262 * |
| 263 * @param affixStr the string from the user |
| 264 * @param appendTo parsed result appended here. |
| 265 * @param status any error parsing returned here. |
| 266 */ |
| 267 static AffixPattern &parseUserAffixString( |
| 268 const UnicodeString &affixStr, |
| 269 AffixPattern &appendTo, |
| 270 UErrorCode &status); |
| 271 |
| 272 UBool equals(const AffixPattern &other) const { |
| 273 return (tokens == other.tokens) |
| 274 && (literals == other.literals) |
| 275 && (hasCurrencyToken == other.hasCurrencyToken) |
| 276 && (hasPercentToken == other.hasPercentToken) |
| 277 && (hasPermillToken == other.hasPermillToken) |
| 278 && (char32Count == other.char32Count); |
| 279 } |
| 280 |
| 281 private: |
| 282 /* |
| 283 * Tokens stored here. Each UChar generally stands for one token. A |
| 284 * Each token is of form 'etttttttllllllll' llllllll is the length of |
| 285 * the token and ranges from 0-255. ttttttt is the token type and ranges |
| 286 * from 0-127. If e is set it means this is an extendo token (to be |
| 287 * described later). To accomodate token lengths above 255, each normal |
| 288 * token (e=0) can be followed by 0 or more extendo tokens (e=1) with |
| 289 * the same type. Right now only kLiteral Tokens have extendo tokens. |
| 290 * Each extendo token provides the next 8 higher bits for the length. |
| 291 * If a kLiteral token is followed by 2 extendo tokens then, then the |
| 292 * llllllll of the next extendo token contains bits 8-15 of the length |
| 293 * and the last extendo token contains bits 16-23 of the length. |
| 294 */ |
| 295 UnicodeString tokens; |
| 296 |
| 297 /* |
| 298 * The characters of the kLiteral tokens are concatenated together here. |
| 299 * The first characters go with the first kLiteral token, the next |
| 300 * characters go with the next kLiteral token etc. |
| 301 */ |
| 302 UnicodeString literals; |
| 303 UBool hasCurrencyToken; |
| 304 UBool hasPercentToken; |
| 305 UBool hasPermillToken; |
| 306 int32_t char32Count; |
| 307 void add(ETokenType t, uint8_t count); |
| 308 |
| 309 }; |
| 310 |
| 311 /** |
| 312 * An iterator over the tokens in an AffixPattern instance. |
| 313 */ |
| 314 class U_I18N_API AffixPatternIterator : public UMemory { |
| 315 public: |
| 316 |
| 317 /** |
| 318 * Using an iterator without first calling iterator on an AffixPattern |
| 319 * instance to initialize the iterator results in |
| 320 * undefined behavior. |
| 321 */ |
| 322 AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextToke
nIndex(0), tokens(NULL), literals(NULL) { } |
| 323 /** |
| 324 * Advances this iterator to the next token. Returns FALSE when there |
| 325 * are no more tokens. Calling the other methods after nextToken() |
| 326 * returns FALSE results in undefined behavior. |
| 327 */ |
| 328 UBool nextToken(); |
| 329 |
| 330 /** |
| 331 * Returns the type of token. |
| 332 */ |
| 333 AffixPattern::ETokenType getTokenType() const; |
| 334 |
| 335 /** |
| 336 * For literal tokens, returns the literal string. Calling this for |
| 337 * other token types results in undefined behavior. |
| 338 * @param result replaced with a read-only alias to the literal string. |
| 339 * @return result |
| 340 */ |
| 341 UnicodeString &getLiteral(UnicodeString &result) const; |
| 342 |
| 343 /** |
| 344 * Returns the token length. Usually 1, but for currency tokens may |
| 345 * be 2 for ISO code and 3 for long form. |
| 346 */ |
| 347 int32_t getTokenLength() const; |
| 348 private: |
| 349 int32_t nextLiteralIndex; |
| 350 int32_t lastLiteralLength; |
| 351 int32_t nextTokenIndex; |
| 352 const UnicodeString *tokens; |
| 353 const UnicodeString *literals; |
| 354 friend class AffixPattern; |
| 355 AffixPatternIterator(const AffixPatternIterator &); |
| 356 AffixPatternIterator &operator=(const AffixPatternIterator &); |
| 357 }; |
| 358 |
| 359 /** |
| 360 * A locale aware class that converts locale independent AffixPattern |
| 361 * instances into locale dependent PluralAffix instances. |
| 362 */ |
| 363 class U_I18N_API AffixPatternParser : public UMemory { |
| 364 public: |
| 365 AffixPatternParser(); |
| 366 AffixPatternParser(const DecimalFormatSymbols &symbols); |
| 367 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols); |
| 368 |
| 369 /** |
| 370 * Parses affixPattern appending the result to appendTo. |
| 371 * @param affixPattern The affix pattern. |
| 372 * @param currencyAffixInfo contains the currency forms. |
| 373 * @param appendTo The result of parsing affixPattern is appended here. |
| 374 * @param status any error returned here. |
| 375 * @return appendTo. |
| 376 */ |
| 377 PluralAffix &parse( |
| 378 const AffixPattern &affixPattern, |
| 379 const CurrencyAffixInfo ¤cyAffixInfo, |
| 380 PluralAffix &appendTo, |
| 381 UErrorCode &status) const; |
| 382 |
| 383 UBool equals(const AffixPatternParser &other) const { |
| 384 return (fPercent == other.fPercent) |
| 385 && (fPermill == other.fPermill) |
| 386 && (fNegative == other.fNegative) |
| 387 && (fPositive == other.fPositive); |
| 388 } |
| 389 |
| 390 private: |
| 391 UnicodeString fPercent; |
| 392 UnicodeString fPermill; |
| 393 UnicodeString fNegative; |
| 394 UnicodeString fPositive; |
| 395 }; |
| 396 |
| 397 |
| 398 U_NAMESPACE_END |
| 399 #endif /* #if !UCONFIG_NO_FORMATTING */ |
| 400 #endif // __AFFIX_PATTERN_PARSER_H__ |
OLD | NEW |