icu46/source/common/rbbi.cpp - Issue 6370014: CJK segmentation patch for ICU 4.6...

Side by Side Diff: icu46/source/common/rbbi.cpp

Issue 6370014: CJK segmentation patch for ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 ***************************************************************************	2 ***************************************************************************

3 * Copyright (C) 1999-2010 International Business Machines Corporation	3 * Copyright (C) 1999-2010 International Business Machines Corporation

4 * and others. All rights reserved.	4 * and others. All rights reserved.

5 ***************************************************************************	5 ***************************************************************************

6 */	6 */

7 //	7 //

8 // file: rbbi.c Contains the implementation of the rule based break iterato r	8 // file: rbbi.c Contains the implementation of the rule based break iterato r

9 // runtime engine and the API implementation for	9 // runtime engine and the API implementation for

10 // class RuleBasedBreakIterator	10 // class RuleBasedBreakIterator

(...skipping 1537 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1548 // checkDictionary This function handles all processing of characters in	1548 // checkDictionary This function handles all processing of characters in

1549 // the "dictionary" set. It will determine the appropriat e	1549 // the "dictionary" set. It will determine the appropriat e

1550 // course of action, and possibly set up a cache in the	1550 // course of action, and possibly set up a cache in the

1551 // process.	1551 // process.

1552 //	1552 //

1553 //------------------------------------------------------------------------------ -	1553 //------------------------------------------------------------------------------ -

1554 int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,	1554 int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,

1555 int32_t endPos,	1555 int32_t endPos,

1556 UBool reverse) {	1556 UBool reverse) {

1557 // Reset the old break cache first.	1557 // Reset the old break cache first.

1558 uint32_t dictionaryCount = fDictionaryCharCount;

1559 reset();	1558 reset();

1560	1559

1561 if (dictionaryCount <= 1 \|\| (endPos - startPos) <= 1) {	1560 // note: code segment below assumes that dictionary chars are in the

	1561 // startPos-endPos range

	1562 // value returned should be next character in sequence

	1563 if ((endPos - startPos) <= 1) {

1562 return (reverse ? startPos : endPos);	1564 return (reverse ? startPos : endPos);

1563 }	1565 }

1564	1566

1565 // Bug 5532. The dictionary code will crash if the input text is UTF-8	1567 // Bug 5532. The dictionary code will crash if the input text is UTF-8

1566 // because native indexes are different from UTF-16 indexes.	1568 // because native indexes are different from UTF-16 indexes.

1567 // Temporary hack: skip dictionary lookup for UTF-8 encoded text.	1569 // Temporary hack: skip dictionary lookup for UTF-8 encoded text.

1568 // It wont give the right breaks, but it's better than a crash.	1570 // It wont give the right breaks, but it's better than a crash.

1569 //	1571 //

1570 // Check the type of the UText by checking its pFuncs field, which	1572 // Check the type of the UText by checking its pFuncs field, which

1571 // is UText's function dispatch table. It will be the same for all	1573 // is UText's function dispatch table. It will be the same for all

(...skipping 132 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1704 for (int32_t i = 0; i < foundBreakCount; ++i) {	1706 for (int32_t i = 0; i < foundBreakCount; ++i) {

1705 fCachedBreakPositions[out++] = breaks.elementAti(i);	1707 fCachedBreakPositions[out++] = breaks.elementAti(i);

1706 }	1708 }

1707 if (endPos > fCachedBreakPositions[out-1]) {	1709 if (endPos > fCachedBreakPositions[out-1]) {

1708 fCachedBreakPositions[out] = endPos;	1710 fCachedBreakPositions[out] = endPos;

1709 }	1711 }

1710 // If there are breaks, then by definition, we are replacing the ori ginal	1712 // If there are breaks, then by definition, we are replacing the ori ginal

1711 // proposed break by one of the breaks we found. Use following() and	1713 // proposed break by one of the breaks we found. Use following() and

1712 // preceding() to do the work. They should never recurse in this cas e.	1714 // preceding() to do the work. They should never recurse in this cas e.

1713 if (reverse) {	1715 if (reverse) {

1714 return preceding(endPos - 1);	1716 return preceding(endPos);

1715 }	1717 }

1716 else {	1718 else {

1717 return following(startPos);	1719 return following(startPos);

1718 }	1720 }

1719 }	1721 }

1720 // If the allocation failed, just fall through to the "no breaks found" case.	1722 // If the allocation failed, just fall through to the "no breaks found" case.

1721 }	1723 }

1722	1724

1723 // If we get here, there were no language-based breaks. Set the text pointer	1725 // If we get here, there were no language-based breaks. Set the text pointer

1724 // to the original proposed break.	1726 // to the original proposed break.

(...skipping 145 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1870 }*/	1872 }*/

1871	1873

1872 void RuleBasedBreakIterator::setBreakType(int32_t type) {	1874 void RuleBasedBreakIterator::setBreakType(int32_t type) {

1873 fBreakType = type;	1875 fBreakType = type;

1874 reset();	1876 reset();

1875 }	1877 }

1876	1878

1877 U_NAMESPACE_END	1879 U_NAMESPACE_END

1878	1880

1879 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */	1881 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

OLD	NEW

« no previous file with comments | « icu46/source/common/dictbe.cpp ('k') | icu46/source/common/triedict.h » ('j') | no next file with comments »