OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * Copyright (C) 1997-2008, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ****************************************************************************** |
| 6 * file name: nfrs.cpp |
| 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) |
| 9 * indentation:4 |
| 10 * |
| 11 * Modification history |
| 12 * Date Name Comments |
| 13 * 10/11/2001 Doug Ported from ICU4J |
| 14 */ |
| 15 |
| 16 #include "nfrs.h" |
| 17 |
| 18 #if U_HAVE_RBNF |
| 19 |
| 20 #include "unicode/uchar.h" |
| 21 #include "nfrule.h" |
| 22 #include "nfrlist.h" |
| 23 |
| 24 #ifdef RBNF_DEBUG |
| 25 #include "cmemory.h" |
| 26 #endif |
| 27 |
| 28 #include "util.h" |
| 29 |
| 30 U_NAMESPACE_BEGIN |
| 31 |
| 32 #if 0 |
| 33 // euclid's algorithm works with doubles |
| 34 // note, doubles only get us up to one quadrillion or so, which |
| 35 // isn't as much range as we get with longs. We probably still |
| 36 // want either 64-bit math, or BigInteger. |
| 37 |
| 38 static int64_t |
| 39 util_lcm(int64_t x, int64_t y) |
| 40 { |
| 41 x.abs(); |
| 42 y.abs(); |
| 43 |
| 44 if (x == 0 || y == 0) { |
| 45 return 0; |
| 46 } else { |
| 47 do { |
| 48 if (x < y) { |
| 49 int64_t t = x; x = y; y = t; |
| 50 } |
| 51 x -= y * (x/y); |
| 52 } while (x != 0); |
| 53 |
| 54 return y; |
| 55 } |
| 56 } |
| 57 |
| 58 #else |
| 59 /** |
| 60 * Calculates the least common multiple of x and y. |
| 61 */ |
| 62 static int64_t |
| 63 util_lcm(int64_t x, int64_t y) |
| 64 { |
| 65 // binary gcd algorithm from Knuth, "The Art of Computer Programming," |
| 66 // vol. 2, 1st ed., pp. 298-299 |
| 67 int64_t x1 = x; |
| 68 int64_t y1 = y; |
| 69 |
| 70 int p2 = 0; |
| 71 while ((x1 & 1) == 0 && (y1 & 1) == 0) { |
| 72 ++p2; |
| 73 x1 >>= 1; |
| 74 y1 >>= 1; |
| 75 } |
| 76 |
| 77 int64_t t; |
| 78 if ((x1 & 1) == 1) { |
| 79 t = -y1; |
| 80 } else { |
| 81 t = x1; |
| 82 } |
| 83 |
| 84 while (t != 0) { |
| 85 while ((t & 1) == 0) { |
| 86 t = t >> 1; |
| 87 } |
| 88 if (t > 0) { |
| 89 x1 = t; |
| 90 } else { |
| 91 y1 = -t; |
| 92 } |
| 93 t = x1 - y1; |
| 94 } |
| 95 |
| 96 int64_t gcd = x1 << p2; |
| 97 |
| 98 // x * y == gcd(x, y) * lcm(x, y) |
| 99 return x / gcd * y; |
| 100 } |
| 101 #endif |
| 102 |
| 103 static const UChar gPercent = 0x0025; |
| 104 static const UChar gColon = 0x003a; |
| 105 static const UChar gSemicolon = 0x003b; |
| 106 static const UChar gLineFeed = 0x000a; |
| 107 |
| 108 static const UChar gFourSpaces[] = |
| 109 { |
| 110 0x20, 0x20, 0x20, 0x20, 0 |
| 111 }; /* " " */ |
| 112 static const UChar gPercentPercent[] = |
| 113 { |
| 114 0x25, 0x25, 0 |
| 115 }; /* "%%" */ |
| 116 |
| 117 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& sta
tus) |
| 118 : name() |
| 119 , rules(0) |
| 120 , negativeNumberRule(NULL) |
| 121 , fIsFractionRuleSet(FALSE) |
| 122 , fIsPublic(FALSE) |
| 123 , fRecursionCount(0) |
| 124 { |
| 125 for (int i = 0; i < 3; ++i) { |
| 126 fractionRules[i] = NULL; |
| 127 } |
| 128 |
| 129 if (U_FAILURE(status)) { |
| 130 return; |
| 131 } |
| 132 |
| 133 UnicodeString& description = descriptions[index]; // !!! make sure index is
valid |
| 134 |
| 135 if (description.length() == 0) { |
| 136 // throw new IllegalArgumentException("Empty rule set description"); |
| 137 status = U_PARSE_ERROR; |
| 138 return; |
| 139 } |
| 140 |
| 141 // if the description begins with a rule set name (the rule set |
| 142 // name can be omitted in formatter descriptions that consist |
| 143 // of only one rule set), copy it out into our "name" member |
| 144 // and delete it from the description |
| 145 if (description.charAt(0) == gPercent) { |
| 146 int32_t pos = description.indexOf(gColon); |
| 147 if (pos == -1) { |
| 148 // throw new IllegalArgumentException("Rule set name doesn't end in
colon"); |
| 149 status = U_PARSE_ERROR; |
| 150 } else { |
| 151 name.setTo(description, 0, pos); |
| 152 while (pos < description.length() && uprv_isRuleWhiteSpace(descripti
on.charAt(++pos))) { |
| 153 } |
| 154 description.remove(0, pos); |
| 155 } |
| 156 } else { |
| 157 name.setTo(UNICODE_STRING_SIMPLE("%default")); |
| 158 } |
| 159 |
| 160 if (description.length() == 0) { |
| 161 // throw new IllegalArgumentException("Empty rule set description"); |
| 162 status = U_PARSE_ERROR; |
| 163 } |
| 164 |
| 165 fIsPublic = name.indexOf(gPercentPercent) != 0; |
| 166 |
| 167 // all of the other members of NFRuleSet are initialized |
| 168 // by parseRules() |
| 169 } |
| 170 |
| 171 void |
| 172 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* o
wner, UErrorCode& status) |
| 173 { |
| 174 // start by creating a Vector whose elements are Strings containing |
| 175 // the descriptions of the rules (one rule per element). The rules |
| 176 // are separated by semicolons (there's no escape facility: ALL |
| 177 // semicolons are rule delimiters) |
| 178 |
| 179 if (U_FAILURE(status)) { |
| 180 return; |
| 181 } |
| 182 |
| 183 // dlf - the original code kept a separate description array for no reason, |
| 184 // so I got rid of it. The loop was too complex so I simplified it. |
| 185 |
| 186 UnicodeString currentDescription; |
| 187 int32_t oldP = 0; |
| 188 while (oldP < description.length()) { |
| 189 int32_t p = description.indexOf(gSemicolon, oldP); |
| 190 if (p == -1) { |
| 191 p = description.length(); |
| 192 } |
| 193 currentDescription.setTo(description, oldP, p - oldP); |
| 194 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules,
status); |
| 195 oldP = p + 1; |
| 196 } |
| 197 |
| 198 // for rules that didn't specify a base value, their base values |
| 199 // were initialized to 0. Make another pass through the list and |
| 200 // set all those rules' base values. We also remove any special |
| 201 // rules from the list and put them into their own member variables |
| 202 int64_t defaultBaseValue = 0; |
| 203 |
| 204 // (this isn't a for loop because we might be deleting items from |
| 205 // the vector-- we want to make sure we only increment i when |
| 206 // we _didn't_ delete aything from the vector) |
| 207 uint32_t i = 0; |
| 208 while (i < rules.size()) { |
| 209 NFRule* rule = rules[i]; |
| 210 |
| 211 switch (rule->getType()) { |
| 212 // if the rule's base value is 0, fill in a default |
| 213 // base value (this will be 1 plus the preceding |
| 214 // rule's base value for regular rule sets, and the |
| 215 // same as the preceding rule's base value in fraction |
| 216 // rule sets) |
| 217 case NFRule::kNoBase: |
| 218 rule->setBaseValue(defaultBaseValue, status); |
| 219 if (!isFractionRuleSet()) { |
| 220 ++defaultBaseValue; |
| 221 } |
| 222 ++i; |
| 223 break; |
| 224 |
| 225 // if it's the negative-number rule, copy it into its own |
| 226 // data member and delete it from the list |
| 227 case NFRule::kNegativeNumberRule: |
| 228 negativeNumberRule = rules.remove(i); |
| 229 break; |
| 230 |
| 231 // if it's the improper fraction rule, copy it into the |
| 232 // correct element of fractionRules |
| 233 case NFRule::kImproperFractionRule: |
| 234 fractionRules[0] = rules.remove(i); |
| 235 break; |
| 236 |
| 237 // if it's the proper fraction rule, copy it into the |
| 238 // correct element of fractionRules |
| 239 case NFRule::kProperFractionRule: |
| 240 fractionRules[1] = rules.remove(i); |
| 241 break; |
| 242 |
| 243 // if it's the master rule, copy it into the |
| 244 // correct element of fractionRules |
| 245 case NFRule::kMasterRule: |
| 246 fractionRules[2] = rules.remove(i); |
| 247 break; |
| 248 |
| 249 // if it's a regular rule that already knows its base value, |
| 250 // check to make sure the rules are in order, and update |
| 251 // the default base value for the next rule |
| 252 default: |
| 253 if (rule->getBaseValue() < defaultBaseValue) { |
| 254 // throw new IllegalArgumentException("Rules are not in order"); |
| 255 status = U_PARSE_ERROR; |
| 256 return; |
| 257 } |
| 258 defaultBaseValue = rule->getBaseValue(); |
| 259 if (!isFractionRuleSet()) { |
| 260 ++defaultBaseValue; |
| 261 } |
| 262 ++i; |
| 263 break; |
| 264 } |
| 265 } |
| 266 } |
| 267 |
| 268 NFRuleSet::~NFRuleSet() |
| 269 { |
| 270 delete negativeNumberRule; |
| 271 delete fractionRules[0]; |
| 272 delete fractionRules[1]; |
| 273 delete fractionRules[2]; |
| 274 } |
| 275 |
| 276 static UBool |
| 277 util_equalRules(const NFRule* rule1, const NFRule* rule2) |
| 278 { |
| 279 if (rule1) { |
| 280 if (rule2) { |
| 281 return *rule1 == *rule2; |
| 282 } |
| 283 } else if (!rule2) { |
| 284 return TRUE; |
| 285 } |
| 286 return FALSE; |
| 287 } |
| 288 |
| 289 UBool |
| 290 NFRuleSet::operator==(const NFRuleSet& rhs) const |
| 291 { |
| 292 if (rules.size() == rhs.rules.size() && |
| 293 fIsFractionRuleSet == rhs.fIsFractionRuleSet && |
| 294 name == rhs.name && |
| 295 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) && |
| 296 util_equalRules(fractionRules[0], rhs.fractionRules[0]) && |
| 297 util_equalRules(fractionRules[1], rhs.fractionRules[1]) && |
| 298 util_equalRules(fractionRules[2], rhs.fractionRules[2])) { |
| 299 |
| 300 for (uint32_t i = 0; i < rules.size(); ++i) { |
| 301 if (*rules[i] != *rhs.rules[i]) { |
| 302 return FALSE; |
| 303 } |
| 304 } |
| 305 return TRUE; |
| 306 } |
| 307 return FALSE; |
| 308 } |
| 309 |
| 310 #define RECURSION_LIMIT 50 |
| 311 |
| 312 void |
| 313 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const |
| 314 { |
| 315 NFRule *rule = findNormalRule(number); |
| 316 if (rule) { // else error, but can't report it |
| 317 NFRuleSet* ncThis = (NFRuleSet*)this; |
| 318 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { |
| 319 // stop recursion |
| 320 ncThis->fRecursionCount = 0; |
| 321 } else { |
| 322 rule->doFormat(number, toAppendTo, pos); |
| 323 ncThis->fRecursionCount--; |
| 324 } |
| 325 } |
| 326 } |
| 327 |
| 328 void |
| 329 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const |
| 330 { |
| 331 NFRule *rule = findDoubleRule(number); |
| 332 if (rule) { // else error, but can't report it |
| 333 NFRuleSet* ncThis = (NFRuleSet*)this; |
| 334 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { |
| 335 // stop recursion |
| 336 ncThis->fRecursionCount = 0; |
| 337 } else { |
| 338 rule->doFormat(number, toAppendTo, pos); |
| 339 ncThis->fRecursionCount--; |
| 340 } |
| 341 } |
| 342 } |
| 343 |
| 344 NFRule* |
| 345 NFRuleSet::findDoubleRule(double number) const |
| 346 { |
| 347 // if this is a fraction rule set, use findFractionRuleSetRule() |
| 348 if (isFractionRuleSet()) { |
| 349 return findFractionRuleSetRule(number); |
| 350 } |
| 351 |
| 352 // if the number is negative, return the negative number rule |
| 353 // (if there isn't a negative-number rule, we pretend it's a |
| 354 // positive number) |
| 355 if (number < 0) { |
| 356 if (negativeNumberRule) { |
| 357 return negativeNumberRule; |
| 358 } else { |
| 359 number = -number; |
| 360 } |
| 361 } |
| 362 |
| 363 // if the number isn't an integer, we use one of the fraction rules... |
| 364 if (number != uprv_floor(number)) { |
| 365 // if the number is between 0 and 1, return the proper |
| 366 // fraction rule |
| 367 if (number < 1 && fractionRules[1]) { |
| 368 return fractionRules[1]; |
| 369 } |
| 370 // otherwise, return the improper fraction rule |
| 371 else if (fractionRules[0]) { |
| 372 return fractionRules[0]; |
| 373 } |
| 374 } |
| 375 |
| 376 // if there's a master rule, use it to format the number |
| 377 if (fractionRules[2]) { |
| 378 return fractionRules[2]; |
| 379 } |
| 380 |
| 381 // and if we haven't yet returned a rule, use findNormalRule() |
| 382 // to find the applicable rule |
| 383 int64_t r = util64_fromDouble(number + 0.5); |
| 384 return findNormalRule(r); |
| 385 } |
| 386 |
| 387 NFRule * |
| 388 NFRuleSet::findNormalRule(int64_t number) const |
| 389 { |
| 390 // if this is a fraction rule set, use findFractionRuleSetRule() |
| 391 // to find the rule (we should only go into this clause if the |
| 392 // value is 0) |
| 393 if (fIsFractionRuleSet) { |
| 394 return findFractionRuleSetRule((double)number); |
| 395 } |
| 396 |
| 397 // if the number is negative, return the negative-number rule |
| 398 // (if there isn't one, pretend the number is positive) |
| 399 if (number < 0) { |
| 400 if (negativeNumberRule) { |
| 401 return negativeNumberRule; |
| 402 } else { |
| 403 number = -number; |
| 404 } |
| 405 } |
| 406 |
| 407 // we have to repeat the preceding two checks, even though we |
| 408 // do them in findRule(), because the version of format() that |
| 409 // takes a long bypasses findRule() and goes straight to this |
| 410 // function. This function does skip the fraction rules since |
| 411 // we know the value is an integer (it also skips the master |
| 412 // rule, since it's considered a fraction rule. Skipping the |
| 413 // master rule in this function is also how we avoid infinite |
| 414 // recursion) |
| 415 |
| 416 // {dlf} unfortunately this fails if there are no rules except |
| 417 // special rules. If there are no rules, use the master rule. |
| 418 |
| 419 // binary-search the rule list for the applicable rule |
| 420 // (a rule is used for all values from its base value to |
| 421 // the next rule's base value) |
| 422 int32_t hi = rules.size(); |
| 423 if (hi > 0) { |
| 424 int32_t lo = 0; |
| 425 |
| 426 while (lo < hi) { |
| 427 int32_t mid = (lo + hi) / 2; |
| 428 if (rules[mid]->getBaseValue() == number) { |
| 429 return rules[mid]; |
| 430 } |
| 431 else if (rules[mid]->getBaseValue() > number) { |
| 432 hi = mid; |
| 433 } |
| 434 else { |
| 435 lo = mid + 1; |
| 436 } |
| 437 } |
| 438 if (hi == 0) { // bad rule set, minimum base > 0 |
| 439 return NULL; // want to throw exception here |
| 440 } |
| 441 |
| 442 NFRule *result = rules[hi - 1]; |
| 443 |
| 444 // use shouldRollBack() to see whether we need to invoke the |
| 445 // rollback rule (see shouldRollBack()'s documentation for |
| 446 // an explanation of the rollback rule). If we do, roll back |
| 447 // one rule and return that one instead of the one we'd normally |
| 448 // return |
| 449 if (result->shouldRollBack((double)number)) { |
| 450 if (hi == 1) { // bad rule set, no prior rule to rollback to from th
is base |
| 451 return NULL; |
| 452 } |
| 453 result = rules[hi - 2]; |
| 454 } |
| 455 return result; |
| 456 } |
| 457 // else use the master rule |
| 458 return fractionRules[2]; |
| 459 } |
| 460 |
| 461 /** |
| 462 * If this rule is a fraction rule set, this function is used by |
| 463 * findRule() to select the most appropriate rule for formatting |
| 464 * the number. Basically, the base value of each rule in the rule |
| 465 * set is treated as the denominator of a fraction. Whichever |
| 466 * denominator can produce the fraction closest in value to the |
| 467 * number passed in is the result. If there's a tie, the earlier |
| 468 * one in the list wins. (If there are two rules in a row with the |
| 469 * same base value, the first one is used when the numerator of the |
| 470 * fraction would be 1, and the second rule is used the rest of the |
| 471 * time. |
| 472 * @param number The number being formatted (which will always be |
| 473 * a number between 0 and 1) |
| 474 * @return The rule to use to format this number |
| 475 */ |
| 476 NFRule* |
| 477 NFRuleSet::findFractionRuleSetRule(double number) const |
| 478 { |
| 479 // the obvious way to do this (multiply the value being formatted |
| 480 // by each rule's base value until you get an integral result) |
| 481 // doesn't work because of rounding error. This method is more |
| 482 // accurate |
| 483 |
| 484 // find the least common multiple of the rules' base values |
| 485 // and multiply this by the number being formatted. This is |
| 486 // all the precision we need, and we can do all of the rest |
| 487 // of the math using integer arithmetic |
| 488 int64_t leastCommonMultiple = rules[0]->getBaseValue(); |
| 489 int64_t numerator; |
| 490 { |
| 491 for (uint32_t i = 1; i < rules.size(); ++i) { |
| 492 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBas
eValue()); |
| 493 } |
| 494 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5
); |
| 495 } |
| 496 // for each rule, do the following... |
| 497 int64_t tempDifference; |
| 498 int64_t difference = util64_fromDouble(uprv_maxMantissa()); |
| 499 int32_t winner = 0; |
| 500 for (uint32_t i = 0; i < rules.size(); ++i) { |
| 501 // "numerator" is the numerator of the fraction if the |
| 502 // denominator is the LCD. The numerator if the rule's |
| 503 // base value is the denominator is "numerator" times the |
| 504 // base value divided bythe LCD. Here we check to see if |
| 505 // that's an integer, and if not, how close it is to being |
| 506 // an integer. |
| 507 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMulti
ple; |
| 508 |
| 509 |
| 510 // normalize the result of the above calculation: we want |
| 511 // the numerator's distance from the CLOSEST multiple |
| 512 // of the LCD |
| 513 if (leastCommonMultiple - tempDifference < tempDifference) { |
| 514 tempDifference = leastCommonMultiple - tempDifference; |
| 515 } |
| 516 |
| 517 // if this is as close as we've come, keep track of how close |
| 518 // that is, and the line number of the rule that did it. If |
| 519 // we've scored a direct hit, we don't have to look at any more |
| 520 // rules |
| 521 if (tempDifference < difference) { |
| 522 difference = tempDifference; |
| 523 winner = i; |
| 524 if (difference == 0) { |
| 525 break; |
| 526 } |
| 527 } |
| 528 } |
| 529 |
| 530 // if we have two successive rules that both have the winning base |
| 531 // value, then the first one (the one we found above) is used if |
| 532 // the numerator of the fraction is 1 and the second one is used if |
| 533 // the numerator of the fraction is anything else (this lets us |
| 534 // do things like "one third"/"two thirds" without haveing to define |
| 535 // a whole bunch of extra rule sets) |
| 536 if ((unsigned)(winner + 1) < rules.size() && |
| 537 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { |
| 538 double n = ((double)rules[winner]->getBaseValue()) * number; |
| 539 if (n < 0.5 || n >= 2) { |
| 540 ++winner; |
| 541 } |
| 542 } |
| 543 |
| 544 // finally, return the winning rule |
| 545 return rules[winner]; |
| 546 } |
| 547 |
| 548 /** |
| 549 * Parses a string. Matches the string to be parsed against each |
| 550 * of its rules (with a base value less than upperBound) and returns |
| 551 * the value produced by the rule that matched the most charcters |
| 552 * in the source string. |
| 553 * @param text The string to parse |
| 554 * @param parsePosition The initial position is ignored and assumed |
| 555 * to be 0. On exit, this object has been updated to point to the |
| 556 * first character position this rule set didn't consume. |
| 557 * @param upperBound Limits the rules that can be allowed to match. |
| 558 * Only rules whose base values are strictly less than upperBound |
| 559 * are considered. |
| 560 * @return The numerical result of parsing this string. This will |
| 561 * be the matching rule's base value, composed appropriately with |
| 562 * the results of matching any of its substitutions. The object |
| 563 * will be an instance of Long if it's an integral value; otherwise, |
| 564 * it will be an instance of Double. This function always returns |
| 565 * a valid object: If nothing matched the input string at all, |
| 566 * this function returns new Long(0), and the parse position is |
| 567 * left unchanged. |
| 568 */ |
| 569 #ifdef RBNF_DEBUG |
| 570 #include <stdio.h> |
| 571 |
| 572 static void dumpUS(FILE* f, const UnicodeString& us) { |
| 573 int len = us.length(); |
| 574 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; |
| 575 if (buf != NULL) { |
| 576 us.extract(0, len, buf); |
| 577 buf[len] = 0; |
| 578 fprintf(f, "%s", buf); |
| 579 uprv_free(buf); //delete[] buf; |
| 580 } |
| 581 } |
| 582 #endif |
| 583 |
| 584 UBool |
| 585 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
d, Formattable& result) const |
| 586 { |
| 587 // try matching each rule in the rule set against the text being |
| 588 // parsed. Whichever one matches the most characters is the one |
| 589 // that determines the value we return. |
| 590 |
| 591 result.setLong(0); |
| 592 |
| 593 // dump out if there's no text to parse |
| 594 if (text.length() == 0) { |
| 595 return 0; |
| 596 } |
| 597 |
| 598 ParsePosition highWaterMark; |
| 599 ParsePosition workingPos = pos; |
| 600 |
| 601 #ifdef RBNF_DEBUG |
| 602 fprintf(stderr, "<nfrs> %x '", this); |
| 603 dumpUS(stderr, name); |
| 604 fprintf(stderr, "' text '"); |
| 605 dumpUS(stderr, text); |
| 606 fprintf(stderr, "'\n"); |
| 607 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); |
| 608 #endif |
| 609 |
| 610 // start by trying the negative number rule (if there is one) |
| 611 if (negativeNumberRule) { |
| 612 Formattable tempResult; |
| 613 #ifdef RBNF_DEBUG |
| 614 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRu
le, upperBound); |
| 615 #endif |
| 616 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBo
und, tempResult); |
| 617 #ifdef RBNF_DEBUG |
| 618 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success
, workingPos.getIndex()); |
| 619 #endif |
| 620 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { |
| 621 result = tempResult; |
| 622 highWaterMark = workingPos; |
| 623 } |
| 624 workingPos = pos; |
| 625 } |
| 626 #ifdef RBNF_DEBUG |
| 627 fprintf(stderr, "<nfrs> continue fractional with text '"); |
| 628 dumpUS(stderr, text); |
| 629 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); |
| 630 #endif |
| 631 // then try each of the fraction rules |
| 632 { |
| 633 for (int i = 0; i < 3; i++) { |
| 634 if (fractionRules[i]) { |
| 635 Formattable tempResult; |
| 636 UBool success = fractionRules[i]->doParse(text, workingPos, 0, u
pperBound, tempResult); |
| 637 if (success && (workingPos.getIndex() > highWaterMark.getIndex()
)) { |
| 638 result = tempResult; |
| 639 highWaterMark = workingPos; |
| 640 } |
| 641 workingPos = pos; |
| 642 } |
| 643 } |
| 644 } |
| 645 #ifdef RBNF_DEBUG |
| 646 fprintf(stderr, "<nfrs> continue other with text '"); |
| 647 dumpUS(stderr, text); |
| 648 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); |
| 649 #endif |
| 650 |
| 651 // finally, go through the regular rules one at a time. We start |
| 652 // at the end of the list because we want to try matching the most |
| 653 // sigificant rule first (this helps ensure that we parse |
| 654 // "five thousand three hundred six" as |
| 655 // "(five thousand) (three hundred) (six)" rather than |
| 656 // "((five thousand three) hundred) (six)"). Skip rules whose |
| 657 // base values are higher than the upper bound (again, this helps |
| 658 // limit ambiguity by making sure the rules that match a rule's |
| 659 // are less significant than the rule containing the substitutions)/ |
| 660 { |
| 661 int64_t ub = util64_fromDouble(upperBound); |
| 662 #ifdef RBNF_DEBUG |
| 663 { |
| 664 char ubstr[64]; |
| 665 util64_toa(ub, ubstr, 64); |
| 666 char ubstrhex[64]; |
| 667 util64_toa(ub, ubstrhex, 64, 16); |
| 668 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhe
x); |
| 669 } |
| 670 #endif |
| 671 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < te
xt.length();) { |
| 672 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { |
| 673 continue; |
| 674 } |
| 675 Formattable tempResult; |
| 676 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleS
et, upperBound, tempResult); |
| 677 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { |
| 678 result = tempResult; |
| 679 highWaterMark = workingPos; |
| 680 } |
| 681 workingPos = pos; |
| 682 } |
| 683 } |
| 684 #ifdef RBNF_DEBUG |
| 685 fprintf(stderr, "<nfrs> exit\n"); |
| 686 #endif |
| 687 // finally, update the parse postion we were passed to point to the |
| 688 // first character we didn't use, and return the result that |
| 689 // corresponds to that string of characters |
| 690 pos = highWaterMark; |
| 691 |
| 692 return 1; |
| 693 } |
| 694 |
| 695 void |
| 696 NFRuleSet::appendRules(UnicodeString& result) const |
| 697 { |
| 698 // the rule set name goes first... |
| 699 result.append(name); |
| 700 result.append(gColon); |
| 701 result.append(gLineFeed); |
| 702 |
| 703 // followed by the regular rules... |
| 704 for (uint32_t i = 0; i < rules.size(); i++) { |
| 705 result.append(gFourSpaces); |
| 706 rules[i]->_appendRuleText(result); |
| 707 result.append(gLineFeed); |
| 708 } |
| 709 |
| 710 // followed by the special rules (if they exist) |
| 711 if (negativeNumberRule) { |
| 712 result.append(gFourSpaces); |
| 713 negativeNumberRule->_appendRuleText(result); |
| 714 result.append(gLineFeed); |
| 715 } |
| 716 |
| 717 { |
| 718 for (uint32_t i = 0; i < 3; ++i) { |
| 719 if (fractionRules[i]) { |
| 720 result.append(gFourSpaces); |
| 721 fractionRules[i]->_appendRuleText(result); |
| 722 result.append(gLineFeed); |
| 723 } |
| 724 } |
| 725 } |
| 726 } |
| 727 |
| 728 // utility functions |
| 729 |
| 730 int64_t util64_fromDouble(double d) { |
| 731 int64_t result = 0; |
| 732 if (!uprv_isNaN(d)) { |
| 733 double mant = uprv_maxMantissa(); |
| 734 if (d < -mant) { |
| 735 d = -mant; |
| 736 } else if (d > mant) { |
| 737 d = mant; |
| 738 } |
| 739 UBool neg = d < 0; |
| 740 if (neg) { |
| 741 d = -d; |
| 742 } |
| 743 result = (int64_t)uprv_floor(d); |
| 744 if (neg) { |
| 745 result = -result; |
| 746 } |
| 747 } |
| 748 return result; |
| 749 } |
| 750 |
| 751 int64_t util64_pow(int32_t r, uint32_t e) { |
| 752 if (r == 0) { |
| 753 return 0; |
| 754 } else if (e == 0) { |
| 755 return 1; |
| 756 } else { |
| 757 int64_t n = r; |
| 758 while (--e > 0) { |
| 759 n *= r; |
| 760 } |
| 761 return n; |
| 762 } |
| 763 } |
| 764 |
| 765 static const uint8_t asciiDigits[] = { |
| 766 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, |
| 767 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, |
| 768 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, |
| 769 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, |
| 770 0x77u, 0x78u, 0x79u, 0x7au, |
| 771 }; |
| 772 |
| 773 static const UChar kUMinus = (UChar)0x002d; |
| 774 |
| 775 #ifdef RBNF_DEBUG |
| 776 static const char kMinus = '-'; |
| 777 |
| 778 static const uint8_t digitInfo[] = { |
| 779 0, 0, 0, 0, 0, 0, 0, 0, |
| 780 0, 0, 0, 0, 0, 0, 0, 0, |
| 781 0, 0, 0, 0, 0, 0, 0, 0, |
| 782 0, 0, 0, 0, 0, 0, 0, 0, |
| 783 0, 0, 0, 0, 0, 0, 0, 0, |
| 784 0, 0, 0, 0, 0, 0, 0, 0, |
| 785 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, |
| 786 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, |
| 787 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
| 788 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
| 789 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
| 790 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
| 791 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
| 792 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
| 793 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
| 794 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
| 795 }; |
| 796 |
| 797 int64_t util64_atoi(const char* str, uint32_t radix) |
| 798 { |
| 799 if (radix > 36) { |
| 800 radix = 36; |
| 801 } else if (radix < 2) { |
| 802 radix = 2; |
| 803 } |
| 804 int64_t lradix = radix; |
| 805 |
| 806 int neg = 0; |
| 807 if (*str == kMinus) { |
| 808 ++str; |
| 809 neg = 1; |
| 810 } |
| 811 int64_t result = 0; |
| 812 uint8_t b; |
| 813 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { |
| 814 result *= lradix; |
| 815 result += (int32_t)b; |
| 816 } |
| 817 if (neg) { |
| 818 result = -result; |
| 819 } |
| 820 return result; |
| 821 } |
| 822 |
| 823 int64_t util64_utoi(const UChar* str, uint32_t radix) |
| 824 { |
| 825 if (radix > 36) { |
| 826 radix = 36; |
| 827 } else if (radix < 2) { |
| 828 radix = 2; |
| 829 } |
| 830 int64_t lradix = radix; |
| 831 |
| 832 int neg = 0; |
| 833 if (*str == kUMinus) { |
| 834 ++str; |
| 835 neg = 1; |
| 836 } |
| 837 int64_t result = 0; |
| 838 UChar c; |
| 839 uint8_t b; |
| 840 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix
)) { |
| 841 result *= lradix; |
| 842 result += (int32_t)b; |
| 843 } |
| 844 if (neg) { |
| 845 result = -result; |
| 846 } |
| 847 return result; |
| 848 } |
| 849 |
| 850 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool ra
w) |
| 851 { |
| 852 if (radix > 36) { |
| 853 radix = 36; |
| 854 } else if (radix < 2) { |
| 855 radix = 2; |
| 856 } |
| 857 int64_t base = radix; |
| 858 |
| 859 char* p = buf; |
| 860 if (len && (w < 0) && (radix == 10) && !raw) { |
| 861 w = -w; |
| 862 *p++ = kMinus; |
| 863 --len; |
| 864 } else if (len && (w == 0)) { |
| 865 *p++ = (char)raw ? 0 : asciiDigits[0]; |
| 866 --len; |
| 867 } |
| 868 |
| 869 while (len && w != 0) { |
| 870 int64_t n = w / base; |
| 871 int64_t m = n * base; |
| 872 int32_t d = (int32_t)(w-m); |
| 873 *p++ = raw ? (char)d : asciiDigits[d]; |
| 874 w = n; |
| 875 --len; |
| 876 } |
| 877 if (len) { |
| 878 *p = 0; // null terminate if room for caller convenience |
| 879 } |
| 880 |
| 881 len = p - buf; |
| 882 if (*buf == kMinus) { |
| 883 ++buf; |
| 884 } |
| 885 while (--p > buf) { |
| 886 char c = *p; |
| 887 *p = *buf; |
| 888 *buf = c; |
| 889 ++buf; |
| 890 } |
| 891 |
| 892 return len; |
| 893 } |
| 894 #endif |
| 895 |
| 896 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool r
aw) |
| 897 { |
| 898 if (radix > 36) { |
| 899 radix = 36; |
| 900 } else if (radix < 2) { |
| 901 radix = 2; |
| 902 } |
| 903 int64_t base = radix; |
| 904 |
| 905 UChar* p = buf; |
| 906 if (len && (w < 0) && (radix == 10) && !raw) { |
| 907 w = -w; |
| 908 *p++ = kUMinus; |
| 909 --len; |
| 910 } else if (len && (w == 0)) { |
| 911 *p++ = (UChar)raw ? 0 : asciiDigits[0]; |
| 912 --len; |
| 913 } |
| 914 |
| 915 while (len && (w != 0)) { |
| 916 int64_t n = w / base; |
| 917 int64_t m = n * base; |
| 918 int32_t d = (int32_t)(w-m); |
| 919 *p++ = (UChar)(raw ? d : asciiDigits[d]); |
| 920 w = n; |
| 921 --len; |
| 922 } |
| 923 if (len) { |
| 924 *p = 0; // null terminate if room for caller convenience |
| 925 } |
| 926 |
| 927 len = (uint32_t)(p - buf); |
| 928 if (*buf == kUMinus) { |
| 929 ++buf; |
| 930 } |
| 931 while (--p > buf) { |
| 932 UChar c = *p; |
| 933 *p = *buf; |
| 934 *buf = c; |
| 935 ++buf; |
| 936 } |
| 937 |
| 938 return len; |
| 939 } |
| 940 |
| 941 |
| 942 U_NAMESPACE_END |
| 943 |
| 944 /* U_HAVE_RBNF */ |
| 945 #endif |
| 946 |
OLD | NEW |