| OLD | NEW |
| 1 /* | 1 /* |
| 2 *************************************************************************** | 2 *************************************************************************** |
| 3 * Copyright (C) 1999-2010 International Business Machines Corporation | 3 * Copyright (C) 1999-2010 International Business Machines Corporation |
| 4 * and others. All rights reserved. | 4 * and others. All rights reserved. |
| 5 *************************************************************************** | 5 *************************************************************************** |
| 6 */ | 6 */ |
| 7 // | 7 // |
| 8 // file: rbbi.c Contains the implementation of the rule based break iterato
r | 8 // file: rbbi.c Contains the implementation of the rule based break iterato
r |
| 9 // runtime engine and the API implementation for | 9 // runtime engine and the API implementation for |
| 10 // class RuleBasedBreakIterator | 10 // class RuleBasedBreakIterator |
| (...skipping 1537 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1548 // checkDictionary This function handles all processing of characters in | 1548 // checkDictionary This function handles all processing of characters in |
| 1549 // the "dictionary" set. It will determine the appropriat
e | 1549 // the "dictionary" set. It will determine the appropriat
e |
| 1550 // course of action, and possibly set up a cache in the | 1550 // course of action, and possibly set up a cache in the |
| 1551 // process. | 1551 // process. |
| 1552 // | 1552 // |
| 1553 //------------------------------------------------------------------------------
- | 1553 //------------------------------------------------------------------------------
- |
| 1554 int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos, | 1554 int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos, |
| 1555 int32_t endPos, | 1555 int32_t endPos, |
| 1556 UBool reverse) { | 1556 UBool reverse) { |
| 1557 // Reset the old break cache first. | 1557 // Reset the old break cache first. |
| 1558 uint32_t dictionaryCount = fDictionaryCharCount; | |
| 1559 reset(); | 1558 reset(); |
| 1560 | 1559 |
| 1561 if (dictionaryCount <= 1 || (endPos - startPos) <= 1) { | 1560 // note: code segment below assumes that dictionary chars are in the |
| 1561 // startPos-endPos range |
| 1562 // value returned should be next character in sequence |
| 1563 if ((endPos - startPos) <= 1) { |
| 1562 return (reverse ? startPos : endPos); | 1564 return (reverse ? startPos : endPos); |
| 1563 } | 1565 } |
| 1564 | 1566 |
| 1565 // Bug 5532. The dictionary code will crash if the input text is UTF-8 | 1567 // Bug 5532. The dictionary code will crash if the input text is UTF-8 |
| 1566 // because native indexes are different from UTF-16 indexes. | 1568 // because native indexes are different from UTF-16 indexes. |
| 1567 // Temporary hack: skip dictionary lookup for UTF-8 encoded text. | 1569 // Temporary hack: skip dictionary lookup for UTF-8 encoded text. |
| 1568 // It wont give the right breaks, but it's better than a crash. | 1570 // It wont give the right breaks, but it's better than a crash. |
| 1569 // | 1571 // |
| 1570 // Check the type of the UText by checking its pFuncs field, which | 1572 // Check the type of the UText by checking its pFuncs field, which |
| 1571 // is UText's function dispatch table. It will be the same for all | 1573 // is UText's function dispatch table. It will be the same for all |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1704 for (int32_t i = 0; i < foundBreakCount; ++i) { | 1706 for (int32_t i = 0; i < foundBreakCount; ++i) { |
| 1705 fCachedBreakPositions[out++] = breaks.elementAti(i); | 1707 fCachedBreakPositions[out++] = breaks.elementAti(i); |
| 1706 } | 1708 } |
| 1707 if (endPos > fCachedBreakPositions[out-1]) { | 1709 if (endPos > fCachedBreakPositions[out-1]) { |
| 1708 fCachedBreakPositions[out] = endPos; | 1710 fCachedBreakPositions[out] = endPos; |
| 1709 } | 1711 } |
| 1710 // If there are breaks, then by definition, we are replacing the ori
ginal | 1712 // If there are breaks, then by definition, we are replacing the ori
ginal |
| 1711 // proposed break by one of the breaks we found. Use following() and | 1713 // proposed break by one of the breaks we found. Use following() and |
| 1712 // preceding() to do the work. They should never recurse in this cas
e. | 1714 // preceding() to do the work. They should never recurse in this cas
e. |
| 1713 if (reverse) { | 1715 if (reverse) { |
| 1714 return preceding(endPos - 1); | 1716 return preceding(endPos); |
| 1715 } | 1717 } |
| 1716 else { | 1718 else { |
| 1717 return following(startPos); | 1719 return following(startPos); |
| 1718 } | 1720 } |
| 1719 } | 1721 } |
| 1720 // If the allocation failed, just fall through to the "no breaks found"
case. | 1722 // If the allocation failed, just fall through to the "no breaks found"
case. |
| 1721 } | 1723 } |
| 1722 | 1724 |
| 1723 // If we get here, there were no language-based breaks. Set the text pointer | 1725 // If we get here, there were no language-based breaks. Set the text pointer |
| 1724 // to the original proposed break. | 1726 // to the original proposed break. |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1870 }*/ | 1872 }*/ |
| 1871 | 1873 |
| 1872 void RuleBasedBreakIterator::setBreakType(int32_t type) { | 1874 void RuleBasedBreakIterator::setBreakType(int32_t type) { |
| 1873 fBreakType = type; | 1875 fBreakType = type; |
| 1874 reset(); | 1876 reset(); |
| 1875 } | 1877 } |
| 1876 | 1878 |
| 1877 U_NAMESPACE_END | 1879 U_NAMESPACE_END |
| 1878 | 1880 |
| 1879 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1881 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
| OLD | NEW |