Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Unified Diff: source/common/rbbi.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/common/putilimp.h ('k') | source/common/rbbidata.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/common/rbbi.cpp
diff --git a/source/common/rbbi.cpp b/source/common/rbbi.cpp
index 6ab57a7c1175d5769e091c4c70edf8b09b364e28..19494af26a564a38909aac4af915be6d459f8b9c 100644
--- a/source/common/rbbi.cpp
+++ b/source/common/rbbi.cpp
@@ -1,6 +1,6 @@
/*
***************************************************************************
-* Copyright (C) 1999-2013 International Business Machines Corporation
+* Copyright (C) 1999-2014 International Business Machines Corporation
* and others. All rights reserved.
***************************************************************************
*/
@@ -518,8 +518,8 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U
/**
- * Sets the current iteration position to the beginning of the text.
- * @return The offset of the beginning of the text.
+ * Sets the current iteration position to the beginning of the text, position zero.
+ * @return The new iterator position, which is zero.
*/
int32_t RuleBasedBreakIterator::first(void) {
reset();
@@ -592,6 +592,7 @@ int32_t RuleBasedBreakIterator::next(void) {
}
int32_t startPos = current();
+ fDictionaryCharCount = 0;
int32_t result = handleNext(fData->fForwardTable);
if (fDictionaryCharCount > 0) {
result = checkDictionary(startPos, result, FALSE);
@@ -646,7 +647,6 @@ int32_t RuleBasedBreakIterator::previous(void) {
// break position before the current position (we back our internal
// iterator up one step to prevent handlePrevious() from returning
// the current position), but not necessarily the last one before
-
// where we started
int32_t start = current();
@@ -679,11 +679,11 @@ int32_t RuleBasedBreakIterator::previous(void) {
// the result position that we are to return (in lastResult.) If
// the backwards rules overshot and the above loop had to do two or more
// next()s to move up to the desired return position, we will have a valid
- // tag value. But, if handlePrevious() took us to exactly the correct result positon,
+ // tag value. But, if handlePrevious() took us to exactly the correct result position,
// we wont have a tag value for that position, which is only set by handleNext().
- // set the current iteration position to be the last break position
- // before where we started, and then return that value
+ // Set the current iteration position to be the last break position
+ // before where we started, and then return that value.
utext_setNativeIndex(fText, lastResult);
fLastRuleStatusIndex = lastTag; // for use by getRuleStatus()
fLastStatusIndexValid = breakTagValid;
@@ -701,6 +701,22 @@ int32_t RuleBasedBreakIterator::previous(void) {
* @return The position of the first break after the current position.
*/
int32_t RuleBasedBreakIterator::following(int32_t offset) {
+ // if the offset passed in is already past the end of the text,
+ // just return DONE; if it's before the beginning, return the
+ // text's starting offset
+ if (fText == NULL || offset >= utext_nativeLength(fText)) {
+ last();
+ return next();
+ }
+ else if (offset < 0) {
+ return first();
+ }
+
+ // Move requested offset to a code point start. It might be on a trail surrogate,
+ // or on a trail byte if the input is UTF-8.
+ utext_setNativeIndex(fText, offset);
+ offset = utext_getNativeIndex(fText);
+
// if we have cached break positions and offset is in the range
// covered by them, use them
// TODO: could use binary search
@@ -722,20 +738,7 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
}
}
- // if the offset passed in is already past the end of the text,
- // just return DONE; if it's before the beginning, return the
- // text's starting offset
- fLastRuleStatusIndex = 0;
- fLastStatusIndexValid = TRUE;
- if (fText == NULL || offset >= utext_nativeLength(fText)) {
- last();
- return next();
- }
- else if (offset < 0) {
- return first();
- }
-
- // otherwise, set our internal iteration position (temporarily)
+ // Set our internal iteration position (temporarily)
// to the position passed in. If this is the _beginning_ position,
// then we can just use next() to get our return value
@@ -747,6 +750,7 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
// move forward one codepoint to prepare for moving back to a
// safe point.
// this handles offset being between a supplementary character
+ // TODO: is this still needed, with move to code point boundary handled above?
(void)UTEXT_NEXT32(fText);
// handlePrevious will move most of the time to < 1 boundary away
handlePrevious(fData->fSafeRevTable);
@@ -809,6 +813,21 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
* @return The position of the last boundary before the starting position.
*/
int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
+ // if the offset passed in is already past the end of the text,
+ // just return DONE; if it's before the beginning, return the
+ // text's starting offset
+ if (fText == NULL || offset > utext_nativeLength(fText)) {
+ return last();
+ }
+ else if (offset < 0) {
+ return first();
+ }
+
+ // Move requested offset to a code point start. It might be on a trail surrogate,
+ // or on a trail byte if the input is UTF-8.
+ utext_setNativeIndex(fText, offset);
+ offset = utext_getNativeIndex(fText);
+
// if we have cached break positions and offset is in the range
// covered by them, use them
if (fCachedBreakPositions != NULL) {
@@ -834,17 +853,6 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
}
}
- // if the offset passed in is already past the end of the text,
- // just return DONE; if it's before the beginning, return the
- // text's starting offset
- if (fText == NULL || offset > utext_nativeLength(fText)) {
- // return BreakIterator::DONE;
- return last();
- }
- else if (offset < 0) {
- return first();
- }
-
// if we start by updating the current iteration position to the
// position specified by the caller, we can just use previous()
// to carry out this operation
@@ -1578,30 +1586,6 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
return (reverse ? startPos : endPos);
}
- // Bug 5532. The dictionary code will crash if the input text is UTF-8
- // because native indexes are different from UTF-16 indexes.
- // Temporary hack: skip dictionary lookup for UTF-8 encoded text.
- // It wont give the right breaks, but it's better than a crash.
- //
- // Check the type of the UText by checking its pFuncs field, which
- // is UText's function dispatch table. It will be the same for all
- // UTF-8 UTexts and different for any other UText type.
- //
- // We have no other type of UText available with non-UTF-16 native indexing.
- // This whole check will go away once the dictionary code is fixed.
- static const void *utext_utf8Funcs;
- if (utext_utf8Funcs == NULL) {
- // Cache the UTF-8 UText function pointer value.
- UErrorCode status = U_ZERO_ERROR;
- UText tempUText = UTEXT_INITIALIZER;
- utext_openUTF8(&tempUText, NULL, 0, &status);
- utext_utf8Funcs = tempUText.pFuncs;
- utext_close(&tempUText);
- }
- if (fText->pFuncs == utext_utf8Funcs) {
- return (reverse ? startPos : endPos);
- }
-
// Starting from the starting point, scan towards the proposed result,
// looking for the first dictionary character (which may be the one
// we're on, if we're starting in the middle of a range).
@@ -1703,6 +1687,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
// If we found breaks, build a new break cache. The first and last entries must
// be the original starting and ending position.
if (foundBreakCount > 0) {
+ U_ASSERT(foundBreakCount == breaks.size());
int32_t totalBreaks = foundBreakCount;
if (startPos < breaks.elementAti(0)) {
totalBreaks += 1;
@@ -1742,8 +1727,6 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
return (reverse ? startPos : endPos);
}
-// defined in ucln_cmn.h
-
U_NAMESPACE_END
« no previous file with comments | « source/common/putilimp.h ('k') | source/common/rbbidata.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698