Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(799)

Unified Diff: source/common/dictbe.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/common/common.vcxproj.filters ('k') | source/common/filteredbrk.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/common/dictbe.cpp
diff --git a/source/common/dictbe.cpp b/source/common/dictbe.cpp
index 88e9139ecea133a2aeee2eb082aab6469f696a72..f1c874d4ad16445578ae4b6d4889c6d9b2620844 100644
--- a/source/common/dictbe.cpp
+++ b/source/common/dictbe.cpp
@@ -1,6 +1,6 @@
/**
*******************************************************************************
- * Copyright (C) 2006-2014, International Business Machines Corporation
+ * Copyright (C) 2006-2015, International Business Machines Corporation
* and others. All Rights Reserved.
*******************************************************************************
*/
@@ -832,11 +832,11 @@ foundBest:
static const int32_t KHMER_LOOKAHEAD = 3;
// Will not combine a non-word with a preceding dictionary word longer than this
-static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 10;
+static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3;
// Will not combine a non-word that shares at least this much prefix with a
// dictionary word, with a preceding word
-static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 5;
+static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3;
// Minimum word size
static const int32_t KHMER_MIN_WORD = 2;
@@ -1138,12 +1138,12 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
return 0;
}
- // UnicodeString version of input UText, NFKC normalized in necessary.
- UnicodeString *inString;
+ // UnicodeString version of input UText, NFKC normalized if necessary.
+ UnicodeString inString;
// inputMap[inStringIndex] = corresponding native index from UText inText.
// If NULL then mapping is 1:1
- UVector32 *inputMap = NULL;
+ LocalPointer<UVector32> inputMap;
UErrorCode status = U_ZERO_ERROR;
@@ -1153,12 +1153,12 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
inText->chunkNativeStart <= rangeStart &&
inText->chunkNativeLimit >= rangeEnd &&
inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) {
-
- // Input UTtxt is in one contiguous UTF-16 chunk.
- // Use Read-only aliasing UnicodeString constructor on it.
- inString = new UnicodeString(FALSE,
- inText->chunkContents + rangeStart - inText->chunkNativeStart,
- rangeEnd - rangeStart);
+
+ // Input UText is in one contiguous UTF-16 chunk.
+ // Use Read-only aliasing UnicodeString.
+ inString.setTo(FALSE,
+ inText->chunkContents + rangeStart - inText->chunkNativeStart,
+ rangeEnd - rangeStart);
} else {
// Copy the text from the original inText (UText) to inString (UnicodeString).
// Create a map from UnicodeString indices -> UText offsets.
@@ -1168,14 +1168,16 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
if (limit > utext_nativeLength(inText)) {
limit = utext_nativeLength(inText);
}
- inString = new UnicodeString;
- inputMap = new UVector32(status);
+ inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
while (utext_getNativeIndex(inText) < limit) {
int32_t nativePosition = utext_getNativeIndex(inText);
UChar32 c = utext_next32(inText);
U_ASSERT(c != U_SENTINEL);
- inString->append(c);
- while (inputMap->size() < inString->length()) {
+ inString.append(c);
+ while (inputMap->size() < inString.length()) {
inputMap->addElement(nativePosition, status);
}
}
@@ -1183,67 +1185,70 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
- if (!nfkcNorm2->isNormalized(*inString, status)) {
- UnicodeString *normalizedInput = new UnicodeString();
+ if (!nfkcNorm2->isNormalized(inString, status)) {
+ UnicodeString normalizedInput;
// normalizedMap[normalizedInput position] == original UText position.
- UVector32 *normalizedMap = new UVector32(status);
+ LocalPointer<UVector32> normalizedMap(new UVector32(status), status);
if (U_FAILURE(status)) {
return 0;
}
UnicodeString fragment;
UnicodeString normalizedFragment;
- for (int32_t srcI = 0; srcI < inString->length();) { // Once per normalization chunk
+ for (int32_t srcI = 0; srcI < inString.length();) { // Once per normalization chunk
fragment.remove();
int32_t fragmentStartI = srcI;
- UChar32 c = inString->char32At(srcI);
+ UChar32 c = inString.char32At(srcI);
for (;;) {
fragment.append(c);
- srcI = inString->moveIndex32(srcI, 1);
- if (srcI == inString->length()) {
+ srcI = inString.moveIndex32(srcI, 1);
+ if (srcI == inString.length()) {
break;
}
- c = inString->char32At(srcI);
+ c = inString.char32At(srcI);
if (nfkcNorm2->hasBoundaryBefore(c)) {
break;
}
}
nfkcNorm2->normalize(fragment, normalizedFragment, status);
- normalizedInput->append(normalizedFragment);
+ normalizedInput.append(normalizedFragment);
// Map every position in the normalized chunk to the start of the chunk
// in the original input.
- int32_t fragmentOriginalStart = inputMap? inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeStart;
- while (normalizedMap->size() < normalizedInput->length()) {
+ int32_t fragmentOriginalStart = inputMap.isValid() ?
+ inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeStart;
+ while (normalizedMap->size() < normalizedInput.length()) {
normalizedMap->addElement(fragmentOriginalStart, status);
if (U_FAILURE(status)) {
break;
}
}
}
- U_ASSERT(normalizedMap->size() == normalizedInput->length());
- int32_t nativeEnd = inputMap? inputMap->elementAti(inString->length()) : inString->length()+rangeStart;
+ U_ASSERT(normalizedMap->size() == normalizedInput.length());
+ int32_t nativeEnd = inputMap.isValid() ?
+ inputMap->elementAti(inString.length()) : inString.length()+rangeStart;
normalizedMap->addElement(nativeEnd, status);
- delete inputMap;
- inputMap = normalizedMap;
- delete inString;
- inString = normalizedInput;
+ inputMap.moveFrom(normalizedMap);
+ inString.moveFrom(normalizedInput);
}
- int32_t numCodePts = inString->countChar32();
- if (numCodePts != inString->length()) {
+ int32_t numCodePts = inString.countChar32();
+ if (numCodePts != inString.length()) {
// There are supplementary characters in the input.
// The dictionary will produce boundary positions in terms of code point indexes,
// not in terms of code unit string indexes.
// Use the inputMap mechanism to take care of this in addition to indexing differences
// from normalization and/or UTF-8 input.
- UBool hadExistingMap = (inputMap != NULL);
+ UBool hadExistingMap = inputMap.isValid();
if (!hadExistingMap) {
- inputMap = new UVector32(status);
+ inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
+ if (U_FAILURE(status)) {
+ return 0;
+ }
}
int32_t cpIdx = 0;
- for (int32_t cuIdx = 0; ; cuIdx = inString->moveIndex32(cuIdx, 1)) {
+ for (int32_t cuIdx = 0; ; cuIdx = inString.moveIndex32(cuIdx, 1)) {
U_ASSERT(cuIdx >= cpIdx);
if (hadExistingMap) {
inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx);
@@ -1251,7 +1256,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
inputMap->addElement(cuIdx+rangeStart, status);
}
cpIdx++;
- if (cuIdx == inString->length()) {
+ if (cuIdx == inString.length()) {
break;
}
}
@@ -1280,7 +1285,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
lengths.setSize(numCodePts);
UText fu = UTEXT_INITIALIZER;
- utext_openUnicodeString(&fu, inString, &status);
+ utext_openUnicodeString(&fu, &inString, &status);
// Dynamic programming to find the best segmentation.
@@ -1288,7 +1293,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// ix is the corresponding string (code unit) index.
// They differ when the string contains supplementary characters.
int32_t ix = 0;
- for (int32_t i = 0; i < numCodePts; ++i, ix = inString->moveIndex32(ix, 1)) {
+ for (int32_t i = 0; i < numCodePts; ++i, ix = inString.moveIndex32(ix, 1)) {
if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
continue;
}
@@ -1306,7 +1311,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// Exclude Korean characters from this treatment, as they should be left
// together by default.
if ((count == 0 || lengths.elementAti(0) != 1) &&
- !fHangulWordSet.contains(inString->char32At(ix))) {
+ !fHangulWordSet.contains(inString.char32At(ix))) {
values.setElementAt(maxSnlp, count); // 255
lengths.setElementAt(1, count++);
}
@@ -1327,14 +1332,14 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// specified in the katakanaCost table according to its length.
bool is_prev_katakana = false;
- bool is_katakana = isKatakana(inString->char32At(ix));
+ bool is_katakana = isKatakana(inString.char32At(ix));
int32_t katakanaRunLength = 1;
if (!is_prev_katakana && is_katakana) {
- int32_t j = inString->moveIndex32(ix, 1);
+ int32_t j = inString.moveIndex32(ix, 1);
// Find the end of the continuous run of Katakana characters
- while (j < inString->length() && katakanaRunLength < kMaxKatakanaGroupLength &&
- isKatakana(inString->char32At(j))) {
- j = inString->moveIndex32(j, 1);
+ while (j < inString.length() && katakanaRunLength < kMaxKatakanaGroupLength &&
+ isKatakana(inString.char32At(j))) {
+ j = inString.moveIndex32(j, 1);
katakanaRunLength++;
}
if (katakanaRunLength < kMaxKatakanaGroupLength) {
@@ -1380,14 +1385,14 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// while reversing t_boundary and pushing values to foundBreaks.
for (int32_t i = numBreaks-1; i >= 0; i--) {
int32_t cpPos = t_boundary.elementAti(i);
- int32_t utextPos = inputMap ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
+ int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
// Boundaries are added to foundBreaks output in ascending order.
U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos);
foundBreaks.push(utextPos, status);
}
- delete inString;
- delete inputMap;
+ // inString goes out of scope
+ // inputMap goes out of scope
return numBreaks;
}
#endif
« no previous file with comments | « source/common/common.vcxproj.filters ('k') | source/common/filteredbrk.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698