Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Unified Diff: source/i18n/nfrule.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/i18n/nfrule.h ('k') | source/i18n/nfsubs.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/nfrule.cpp
diff --git a/source/i18n/nfrule.cpp b/source/i18n/nfrule.cpp
index b96a84eb91d80c474316e162fa983cf4a26ec8b5..f8ed0b6c35fa68e57555f276b467363e576b9965 100644
--- a/source/i18n/nfrule.cpp
+++ b/source/i18n/nfrule.cpp
@@ -1,6 +1,6 @@
/*
******************************************************************************
-* Copyright (C) 1997-2011, International Business Machines
+* Copyright (C) 1997-2014, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* file name: nfrule.cpp
@@ -17,8 +17,11 @@
#if U_HAVE_RBNF
+#include "unicode/localpointer.h"
#include "unicode/rbnf.h"
#include "unicode/tblcoll.h"
+#include "unicode/plurfmt.h"
+#include "unicode/upluralrules.h"
#include "unicode/coleitr.h"
#include "unicode/uchar.h"
#include "nfrs.h"
@@ -36,13 +39,17 @@ NFRule::NFRule(const RuleBasedNumberFormat* _rbnf)
, sub1(NULL)
, sub2(NULL)
, formatter(_rbnf)
+ , rulePatternFormat(NULL)
{
}
NFRule::~NFRule()
{
- delete sub1;
- delete sub2;
+ if (sub1 != sub2) {
+ delete sub2;
+ }
+ delete sub1;
+ delete rulePatternFormat;
}
static const UChar gLeftBracket = 0x005b;
@@ -65,6 +72,9 @@ static const UChar gXDotX[] = {0x78, 0x2E, 0x78, 0}; /* "x.x"
static const UChar gXDotZero[] = {0x78, 0x2E, 0x30, 0}; /* "x.0" */
static const UChar gZeroDotX[] = {0x30, 0x2E, 0x78, 0}; /* "0.x" */
+static const UChar gDollarOpenParenthesis[] = {0x24, 0x28, 0}; /* "$(" */
+static const UChar gClosedParenthesisDollar[] = {0x29, 0x24, 0}; /* ")$" */
+
static const UChar gLessLess[] = {0x3C, 0x3C, 0}; /* "<<" */
static const UChar gLessPercent[] = {0x3C, 0x25, 0}; /* "<%" */
static const UChar gLessHash[] = {0x3C, 0x23, 0}; /* "<#" */
@@ -116,8 +126,7 @@ NFRule::makeRules(UnicodeString& description,
if (brack1 == -1 || brack2 == -1 || brack1 > brack2
|| rule1->getType() == kProperFractionRule
|| rule1->getType() == kNegativeNumberRule) {
- rule1->ruleText = description;
- rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status);
+ rule1->extractSubstitutions(ruleSet, description, predecessor, status);
rules.add(rule1);
} else {
// if the description does contain a matched pair of brackets,
@@ -177,8 +186,7 @@ NFRule::makeRules(UnicodeString& description,
if (brack2 + 1 < description.length()) {
sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);
}
- rule2->ruleText.setTo(sbuf);
- rule2->extractSubstitutions(ruleSet, predecessor, rbnf, status);
+ rule2->extractSubstitutions(ruleSet, sbuf, predecessor, status);
}
// rule1's text includes the text in the brackets but omits
@@ -189,8 +197,7 @@ NFRule::makeRules(UnicodeString& description,
if (brack2 + 1 < description.length()) {
sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);
}
- rule1->ruleText.setTo(sbuf);
- rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status);
+ rule1->extractSubstitutions(ruleSet, sbuf, predecessor, status);
// if we only have one rule, return it; if we have two, return
// a two-element array containing them (notice that rule2 goes
@@ -369,13 +376,45 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
*/
void
NFRule::extractSubstitutions(const NFRuleSet* ruleSet,
+ const UnicodeString &ruleText,
const NFRule* predecessor,
- const RuleBasedNumberFormat* rbnf,
UErrorCode& status)
{
- if (U_SUCCESS(status)) {
- sub1 = extractSubstitution(ruleSet, predecessor, rbnf, status);
- sub2 = extractSubstitution(ruleSet, predecessor, rbnf, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ this->ruleText = ruleText;
+ this->rulePatternFormat = NULL;
+ sub1 = extractSubstitution(ruleSet, predecessor, status);
+ if (sub1 == NULL || sub1->isNullSubstitution()) {
+ // Small optimization. There is no need to create a redundant NullSubstitution.
+ sub2 = sub1;
+ }
+ else {
+ sub2 = extractSubstitution(ruleSet, predecessor, status);
+ }
+ int32_t pluralRuleStart = this->ruleText.indexOf(gDollarOpenParenthesis, -1, 0);
+ int32_t pluralRuleEnd = (pluralRuleStart >= 0 ? this->ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) : -1);
+ if (pluralRuleEnd >= 0) {
+ int32_t endType = this->ruleText.indexOf(gComma, pluralRuleStart);
+ if (endType < 0) {
+ status = U_PARSE_ERROR;
+ return;
+ }
+ UnicodeString type(this->ruleText.tempSubString(pluralRuleStart + 2, endType - pluralRuleStart - 2));
+ UPluralType pluralType;
+ if (type.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) {
+ pluralType = UPLURAL_TYPE_CARDINAL;
+ }
+ else if (type.startsWith(UNICODE_STRING_SIMPLE("ordinal"))) {
+ pluralType = UPLURAL_TYPE_ORDINAL;
+ }
+ else {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ rulePatternFormat = formatter->createPluralFormat(pluralType,
+ this->ruleText.tempSubString(endType + 1, pluralRuleEnd - endType - 1), status);
}
}
@@ -394,7 +433,6 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet,
NFSubstitution *
NFRule::extractSubstitution(const NFRuleSet* ruleSet,
const NFRule* predecessor,
- const RuleBasedNumberFormat* rbnf,
UErrorCode& status)
{
NFSubstitution* result = NULL;
@@ -408,7 +446,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet,
// at the end of the rule text
if (subStart == -1) {
return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor,
- ruleSet, rbnf, UnicodeString(), status);
+ ruleSet, this->formatter, UnicodeString(), status);
}
// special-case the ">>>" token, since searching for the > at the
@@ -436,7 +474,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet,
// at the end of the rule
if (subEnd == -1) {
return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor,
- ruleSet, rbnf, UnicodeString(), status);
+ ruleSet, this->formatter, UnicodeString(), status);
}
// if we get here, we have a real substitution token (or at least
@@ -445,7 +483,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet,
UnicodeString subToken;
subToken.setTo(ruleText, subStart, subEnd + 1 - subStart);
result = NFSubstitution::makeSubstitution(subStart, this, predecessor, ruleSet,
- rbnf, subToken, status);
+ this->formatter, subToken, status);
// remove the substitution from the rule text
ruleText.removeBetween(subStart, subEnd+1);
@@ -644,16 +682,39 @@ NFRule::_appendRuleText(UnicodeString& result) const
* should be inserted
*/
void
-NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos) const
+NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const
{
// first, insert the rule's rule text into toInsertInto at the
// specified position, then insert the results of the substitutions
// into the right places in toInsertInto (notice we do the
// substitutions in reverse order so that the offsets don't get
// messed up)
- toInsertInto.insert(pos, ruleText);
- sub2->doSubstitution(number, toInsertInto, pos);
- sub1->doSubstitution(number, toInsertInto, pos);
+ int32_t pluralRuleStart = ruleText.length();
+ int32_t lengthOffset = 0;
+ if (!rulePatternFormat) {
+ toInsertInto.insert(pos, ruleText);
+ }
+ else {
+ pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0);
+ int pluralRuleEnd = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart);
+ int initialLength = toInsertInto.length();
+ if (pluralRuleEnd < ruleText.length() - 1) {
+ toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2));
+ }
+ toInsertInto.insert(pos,
+ rulePatternFormat->format((int32_t)(number/uprv_pow(radix, exponent)), status));
+ if (pluralRuleStart > 0) {
+ toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart));
+ }
+ lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength);
+ }
+
+ if (!sub2->isNullSubstitution()) {
+ sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), status);
+ }
+ if (!sub1->isNullSubstitution()) {
+ sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), status);
+ }
}
/**
@@ -666,7 +727,7 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos) const
* should be inserted
*/
void
-NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const
+NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const
{
// first, insert the rule's rule text into toInsertInto at the
// specified position, then insert the results of the substitutions
@@ -674,9 +735,32 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const
// [again, we have two copies of this routine that do the same thing
// so that we don't sacrifice precision in a long by casting it
// to a double]
- toInsertInto.insert(pos, ruleText);
- sub2->doSubstitution(number, toInsertInto, pos);
- sub1->doSubstitution(number, toInsertInto, pos);
+ int32_t pluralRuleStart = ruleText.length();
+ int32_t lengthOffset = 0;
+ if (!rulePatternFormat) {
+ toInsertInto.insert(pos, ruleText);
+ }
+ else {
+ pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0);
+ int pluralRuleEnd = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart);
+ int initialLength = toInsertInto.length();
+ if (pluralRuleEnd < ruleText.length() - 1) {
+ toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2));
+ }
+ toInsertInto.insert(pos,
+ rulePatternFormat->format((int32_t)(number/uprv_pow(radix, exponent)), status));
+ if (pluralRuleStart > 0) {
+ toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart));
+ }
+ lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength);
+ }
+
+ if (!sub2->isNullSubstitution()) {
+ sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), status);
+ }
+ if (!sub1->isNullSubstitution()) {
+ sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), status);
+ }
}
/**
@@ -1136,16 +1220,17 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr
// isn't a RuleBasedCollator, because RuleBasedCollator defines
// the CollationElementIterator protocol. Hopefully, this
// will change someday.)
- RuleBasedCollator* collator = (RuleBasedCollator*)formatter->getCollator();
- CollationElementIterator* strIter = collator->createCollationElementIterator(str);
- CollationElementIterator* prefixIter = collator->createCollationElementIterator(prefix);
+ const RuleBasedCollator* collator = formatter->getCollator();
+ if (collator == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ LocalPointer<CollationElementIterator> strIter(collator->createCollationElementIterator(str));
+ LocalPointer<CollationElementIterator> prefixIter(collator->createCollationElementIterator(prefix));
// Check for memory allocation error.
- if (collator == NULL || strIter == NULL || prefixIter == NULL) {
- delete collator;
- delete strIter;
- delete prefixIter;
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
+ if (strIter.isNull() || prefixIter.isNull()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
}
UErrorCode err = U_ZERO_ERROR;
@@ -1197,8 +1282,6 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr
// if skipping over ignorables brought us to the end
// of the target string, we didn't match and return 0
if (oStr == CollationElementIterator::NULLORDER) {
- delete prefixIter;
- delete strIter;
return 0;
}
@@ -1207,8 +1290,6 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr
// get a mismatch, dump out and return 0
if (CollationElementIterator::primaryOrder(oStr)
!= CollationElementIterator::primaryOrder(oPrefix)) {
- delete prefixIter;
- delete strIter;
return 0;
// otherwise, advance to the next character in each string
@@ -1228,9 +1309,6 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr
#ifdef RBNF_DEBUG
fprintf(stderr, "prefix length: %d\n", result);
#endif
- delete prefixIter;
- delete strIter;
-
return result;
#if 0
//----------------------------------------------------------------
@@ -1314,107 +1392,84 @@ NFRule::findText(const UnicodeString& str,
int32_t startingAt,
int32_t* length) const
{
-#if !UCONFIG_NO_COLLATION
- // if lenient parsing is turned off, this is easy: just call
- // String.indexOf() and we're done
+ if (rulePatternFormat) {
+ Formattable result;
+ FieldPosition position(UNUM_INTEGER_FIELD);
+ position.setBeginIndex(startingAt);
+ rulePatternFormat->parseType(str, this, result, position);
+ int start = position.getBeginIndex();
+ if (start >= 0) {
+ int32_t pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0);
+ int32_t pluralRuleSuffix = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) + 2;
+ int32_t matchLen = position.getEndIndex() - start;
+ UnicodeString prefix(ruleText.tempSubString(0, pluralRuleStart));
+ UnicodeString suffix(ruleText.tempSubString(pluralRuleSuffix));
+ if (str.compare(start - prefix.length(), prefix.length(), prefix, 0, prefix.length()) == 0
+ && str.compare(start + matchLen, suffix.length(), suffix, 0, suffix.length()) == 0)
+ {
+ *length = matchLen + prefix.length() + suffix.length();
+ return start - prefix.length();
+ }
+ }
+ *length = 0;
+ return -1;
+ }
if (!formatter->isLenient()) {
+ // if lenient parsing is turned off, this is easy: just call
+ // String.indexOf() and we're done
*length = key.length();
return str.indexOf(key, startingAt);
-
+ }
+ else {
// but if lenient parsing is turned ON, we've got some work
// ahead of us
- } else
-#endif
- {
- //----------------------------------------------------------------
- // JDK 1.1 HACK (take out of 1.2-specific code)
-
- // in JDK 1.2, CollationElementIterator provides us with an
- // API to map between character offsets and collation elements
- // and we can do this by marching through the string comparing
- // collation elements. We can't do that in JDK 1.1. Insted,
- // we have to go through this horrible slow mess:
- int32_t p = startingAt;
- int32_t keyLen = 0;
-
- // basically just isolate smaller and smaller substrings of
- // the target string (each running to the end of the string,
- // and with the first one running from startingAt to the end)
- // and then use prefixLength() to see if the search key is at
- // the beginning of each substring. This is excruciatingly
- // slow, but it will locate the key and tell use how long the
- // matching text was.
- UnicodeString temp;
- UErrorCode status = U_ZERO_ERROR;
- while (p < str.length() && keyLen == 0) {
- temp.setTo(str, p, str.length() - p);
- keyLen = prefixLength(temp, key, status);
- if (U_FAILURE(status)) {
- break;
- }
- if (keyLen != 0) {
- *length = keyLen;
- return p;
- }
- ++p;
- }
- // if we make it to here, we didn't find it. Return -1 for the
- // location. The length should be ignored, but set it to 0,
- // which should be "safe"
- *length = 0;
- return -1;
+ return findTextLenient(str, key, startingAt, length);
+ }
+}
- //----------------------------------------------------------------
- // JDK 1.2 version of this routine
- //RuleBasedCollator collator = (RuleBasedCollator)formatter.getCollator();
- //
- //CollationElementIterator strIter = collator.getCollationElementIterator(str);
- //CollationElementIterator keyIter = collator.getCollationElementIterator(key);
- //
- //int keyStart = -1;
- //
- //str.setOffset(startingAt);
- //
- //int oStr = strIter.next();
- //int oKey = keyIter.next();
- //while (oKey != CollationElementIterator.NULLORDER) {
- // while (oStr != CollationElementIterator.NULLORDER &&
- // CollationElementIterator.primaryOrder(oStr) == 0)
- // oStr = strIter.next();
- //
- // while (oKey != CollationElementIterator.NULLORDER &&
- // CollationElementIterator.primaryOrder(oKey) == 0)
- // oKey = keyIter.next();
- //
- // if (oStr == CollationElementIterator.NULLORDER) {
- // return new int[] { -1, 0 };
- // }
- //
- // if (oKey == CollationElementIterator.NULLORDER) {
- // break;
- // }
- //
- // if (CollationElementIterator.primaryOrder(oStr) ==
- // CollationElementIterator.primaryOrder(oKey)) {
- // keyStart = strIter.getOffset();
- // oStr = strIter.next();
- // oKey = keyIter.next();
- // } else {
- // if (keyStart != -1) {
- // keyStart = -1;
- // keyIter.reset();
- // } else {
- // oStr = strIter.next();
- // }
- // }
- //}
- //
- //if (oKey == CollationElementIterator.NULLORDER) {
- // return new int[] { keyStart, strIter.getOffset() - keyStart };
- //} else {
- // return new int[] { -1, 0 };
- //}
+int32_t
+NFRule::findTextLenient(const UnicodeString& str,
+ const UnicodeString& key,
+ int32_t startingAt,
+ int32_t* length) const
+{
+ //----------------------------------------------------------------
+ // JDK 1.1 HACK (take out of 1.2-specific code)
+
+ // in JDK 1.2, CollationElementIterator provides us with an
+ // API to map between character offsets and collation elements
+ // and we can do this by marching through the string comparing
+ // collation elements. We can't do that in JDK 1.1. Insted,
+ // we have to go through this horrible slow mess:
+ int32_t p = startingAt;
+ int32_t keyLen = 0;
+
+ // basically just isolate smaller and smaller substrings of
+ // the target string (each running to the end of the string,
+ // and with the first one running from startingAt to the end)
+ // and then use prefixLength() to see if the search key is at
+ // the beginning of each substring. This is excruciatingly
+ // slow, but it will locate the key and tell use how long the
+ // matching text was.
+ UnicodeString temp;
+ UErrorCode status = U_ZERO_ERROR;
+ while (p < str.length() && keyLen == 0) {
+ temp.setTo(str, p, str.length() - p);
+ keyLen = prefixLength(temp, key, status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ if (keyLen != 0) {
+ *length = keyLen;
+ return p;
+ }
+ ++p;
}
+ // if we make it to here, we didn't find it. Return -1 for the
+ // location. The length should be ignored, but set it to 0,
+ // which should be "safe"
+ *length = 0;
+ return -1;
}
/**
@@ -1438,15 +1493,17 @@ NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const
// a collation element iterator and make sure each collation
// element is 0 (ignorable) at the primary level
if (formatter->isLenient()) {
- RuleBasedCollator* collator = (RuleBasedCollator*)(formatter->getCollator());
- CollationElementIterator* iter = collator->createCollationElementIterator(str);
-
+ const RuleBasedCollator* collator = formatter->getCollator();
+ if (collator == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ LocalPointer<CollationElementIterator> iter(collator->createCollationElementIterator(str));
+
// Memory allocation error check.
- if (collator == NULL || iter == NULL) {
- delete collator;
- delete iter;
- status = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
+ if (iter.isNull()) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
}
UErrorCode err = U_ZERO_ERROR;
@@ -1456,7 +1513,6 @@ NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const
o = iter->next(err);
}
- delete iter;
return o == CollationElementIterator::NULLORDER;
}
#endif
@@ -1470,5 +1526,3 @@ U_NAMESPACE_END
/* U_HAVE_RBNF */
#endif
-
-
« no previous file with comments | « source/i18n/nfrule.h ('k') | source/i18n/nfsubs.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698