OLD | NEW |
1 /* | 1 /* |
2 *************************************************************************** | 2 *************************************************************************** |
3 * Copyright (C) 1999-2010 International Business Machines Corporation | 3 * Copyright (C) 1999-2010 International Business Machines Corporation |
4 * and others. All rights reserved. | 4 * and others. All rights reserved. |
5 *************************************************************************** | 5 *************************************************************************** |
6 */ | 6 */ |
7 // | 7 // |
8 // file: rbbi.c Contains the implementation of the rule based break iterato
r | 8 // file: rbbi.c Contains the implementation of the rule based break iterato
r |
9 // runtime engine and the API implementation for | 9 // runtime engine and the API implementation for |
10 // class RuleBasedBreakIterator | 10 // class RuleBasedBreakIterator |
(...skipping 1537 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1548 // checkDictionary This function handles all processing of characters in | 1548 // checkDictionary This function handles all processing of characters in |
1549 // the "dictionary" set. It will determine the appropriat
e | 1549 // the "dictionary" set. It will determine the appropriat
e |
1550 // course of action, and possibly set up a cache in the | 1550 // course of action, and possibly set up a cache in the |
1551 // process. | 1551 // process. |
1552 // | 1552 // |
1553 //------------------------------------------------------------------------------
- | 1553 //------------------------------------------------------------------------------
- |
1554 int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos, | 1554 int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos, |
1555 int32_t endPos, | 1555 int32_t endPos, |
1556 UBool reverse) { | 1556 UBool reverse) { |
1557 // Reset the old break cache first. | 1557 // Reset the old break cache first. |
1558 uint32_t dictionaryCount = fDictionaryCharCount; | |
1559 reset(); | 1558 reset(); |
1560 | 1559 |
1561 if (dictionaryCount <= 1 || (endPos - startPos) <= 1) { | 1560 // note: code segment below assumes that dictionary chars are in the |
| 1561 // startPos-endPos range |
| 1562 // value returned should be next character in sequence |
| 1563 if ((endPos - startPos) <= 1) { |
1562 return (reverse ? startPos : endPos); | 1564 return (reverse ? startPos : endPos); |
1563 } | 1565 } |
1564 | 1566 |
1565 // Bug 5532. The dictionary code will crash if the input text is UTF-8 | 1567 // Bug 5532. The dictionary code will crash if the input text is UTF-8 |
1566 // because native indexes are different from UTF-16 indexes. | 1568 // because native indexes are different from UTF-16 indexes. |
1567 // Temporary hack: skip dictionary lookup for UTF-8 encoded text. | 1569 // Temporary hack: skip dictionary lookup for UTF-8 encoded text. |
1568 // It wont give the right breaks, but it's better than a crash. | 1570 // It wont give the right breaks, but it's better than a crash. |
1569 // | 1571 // |
1570 // Check the type of the UText by checking its pFuncs field, which | 1572 // Check the type of the UText by checking its pFuncs field, which |
1571 // is UText's function dispatch table. It will be the same for all | 1573 // is UText's function dispatch table. It will be the same for all |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1704 for (int32_t i = 0; i < foundBreakCount; ++i) { | 1706 for (int32_t i = 0; i < foundBreakCount; ++i) { |
1705 fCachedBreakPositions[out++] = breaks.elementAti(i); | 1707 fCachedBreakPositions[out++] = breaks.elementAti(i); |
1706 } | 1708 } |
1707 if (endPos > fCachedBreakPositions[out-1]) { | 1709 if (endPos > fCachedBreakPositions[out-1]) { |
1708 fCachedBreakPositions[out] = endPos; | 1710 fCachedBreakPositions[out] = endPos; |
1709 } | 1711 } |
1710 // If there are breaks, then by definition, we are replacing the ori
ginal | 1712 // If there are breaks, then by definition, we are replacing the ori
ginal |
1711 // proposed break by one of the breaks we found. Use following() and | 1713 // proposed break by one of the breaks we found. Use following() and |
1712 // preceding() to do the work. They should never recurse in this cas
e. | 1714 // preceding() to do the work. They should never recurse in this cas
e. |
1713 if (reverse) { | 1715 if (reverse) { |
1714 return preceding(endPos - 1); | 1716 return preceding(endPos); |
1715 } | 1717 } |
1716 else { | 1718 else { |
1717 return following(startPos); | 1719 return following(startPos); |
1718 } | 1720 } |
1719 } | 1721 } |
1720 // If the allocation failed, just fall through to the "no breaks found"
case. | 1722 // If the allocation failed, just fall through to the "no breaks found"
case. |
1721 } | 1723 } |
1722 | 1724 |
1723 // If we get here, there were no language-based breaks. Set the text pointer | 1725 // If we get here, there were no language-based breaks. Set the text pointer |
1724 // to the original proposed break. | 1726 // to the original proposed break. |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1870 }*/ | 1872 }*/ |
1871 | 1873 |
1872 void RuleBasedBreakIterator::setBreakType(int32_t type) { | 1874 void RuleBasedBreakIterator::setBreakType(int32_t type) { |
1873 fBreakType = type; | 1875 fBreakType = type; |
1874 reset(); | 1876 reset(); |
1875 } | 1877 } |
1876 | 1878 |
1877 U_NAMESPACE_END | 1879 U_NAMESPACE_END |
1878 | 1880 |
1879 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1881 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |