| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2008-2010, International Business Machines | 3 * Copyright (C) 2008-2015, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 * Date Name Description | 6 * Date Name Description |
| 7 * 05/11/2008 Andy Heninger Port from Java | 7 * 05/11/2008 Andy Heninger Port from Java |
| 8 ********************************************************************** | 8 ********************************************************************** |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
| 12 | 12 |
| 13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION | 13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION |
| 14 | 14 |
| 15 #include "unicode/brkiter.h" |
| 16 #include "unicode/localpointer.h" |
| 17 #include "unicode/uchar.h" |
| 15 #include "unicode/unifilt.h" | 18 #include "unicode/unifilt.h" |
| 16 #include "unicode/uchar.h" | |
| 17 #include "unicode/uniset.h" | 19 #include "unicode/uniset.h" |
| 18 #include "unicode/brkiter.h" | 20 |
| 19 #include "brktrans.h" | 21 #include "brktrans.h" |
| 20 #include "unicode/uchar.h" | |
| 21 #include "cmemory.h" | 22 #include "cmemory.h" |
| 23 #include "mutex.h" |
| 22 #include "uprops.h" | 24 #include "uprops.h" |
| 23 #include "uinvchar.h" | 25 #include "uinvchar.h" |
| 24 #include "util.h" | 26 #include "util.h" |
| 25 #include "uvectr32.h" | 27 #include "uvectr32.h" |
| 26 | 28 |
| 27 U_NAMESPACE_BEGIN | 29 U_NAMESPACE_BEGIN |
| 28 | 30 |
| 29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) | 31 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) |
| 30 | 32 |
| 31 static const UChar SPACE = 32; // ' ' | 33 static const UChar SPACE = 32; // ' ' |
| 32 | 34 |
| 33 | 35 |
| 34 /** | 36 /** |
| 35 * Constructs a transliterator with the default delimiters '{' and | 37 * Constructs a transliterator with the default delimiters '{' and |
| 36 * '}'. | 38 * '}'. |
| 37 */ | 39 */ |
| 38 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : | 40 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : |
| 39 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), | 41 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), |
| 40 fInsertion(SPACE) { | 42 cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) { |
| 41 bi = NULL; | |
| 42 UErrorCode status = U_ZERO_ERROR; | |
| 43 boundaries = new UVector32(status); | |
| 44 } | 43 } |
| 45 | 44 |
| 46 | 45 |
| 47 /** | 46 /** |
| 48 * Destructor. | 47 * Destructor. |
| 49 */ | 48 */ |
| 50 BreakTransliterator::~BreakTransliterator() { | 49 BreakTransliterator::~BreakTransliterator() { |
| 51 delete bi; | |
| 52 bi = NULL; | |
| 53 delete boundaries; | |
| 54 boundaries = NULL; | |
| 55 } | 50 } |
| 56 | 51 |
| 57 /** | 52 /** |
| 58 * Copy constructor. | 53 * Copy constructor. |
| 59 */ | 54 */ |
| 60 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : | 55 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : |
| 61 Transliterator(o) { | 56 Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.
fInsertion) { |
| 62 bi = NULL; | 57 } |
| 63 if (o.bi != NULL) { | |
| 64 bi = o.bi->clone(); | |
| 65 } | |
| 66 fInsertion = o.fInsertion; | |
| 67 UErrorCode status = U_ZERO_ERROR; | |
| 68 boundaries = new UVector32(status); | |
| 69 } | |
| 70 | 58 |
| 71 | 59 |
| 72 /** | 60 /** |
| 73 * Transliterator API. | 61 * Transliterator API. |
| 74 */ | 62 */ |
| 75 Transliterator* BreakTransliterator::clone(void) const { | 63 Transliterator* BreakTransliterator::clone(void) const { |
| 76 return new BreakTransliterator(*this); | 64 return new BreakTransliterator(*this); |
| 77 } | 65 } |
| 78 | 66 |
| 79 /** | 67 /** |
| 80 * Implements {@link Transliterator#handleTransliterate}. | 68 * Implements {@link Transliterator#handleTransliterate}. |
| 81 */ | 69 */ |
| 82 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
offsets, | 70 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
offsets, |
| 83 UBool isIncremental ) const
{ | 71 UBool isIncremental ) const
{ |
| 84 | 72 |
| 85 UErrorCode status = U_ZERO_ERROR; | 73 UErrorCode status = U_ZERO_ERROR; |
| 74 LocalPointer<BreakIterator> bi; |
| 75 LocalPointer<UVector32> boundaries; |
| 76 |
| 77 { |
| 78 Mutex m; |
| 79 BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *
>(this); |
| 80 boundaries.moveFrom(nonConstThis->cachedBoundaries); |
| 81 bi.moveFrom(nonConstThis->cachedBI); |
| 82 } |
| 83 if (bi.isNull()) { |
| 84 bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish
(), status)); |
| 85 } |
| 86 if (boundaries.isNull()) { |
| 87 boundaries.adoptInstead(new UVector32(status)); |
| 88 } |
| 89 |
| 90 if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) { |
| 91 return; |
| 92 } |
| 93 |
| 86 boundaries->removeAllElements(); | 94 boundaries->removeAllElements(); |
| 87 BreakTransliterator *nonConstThis = (BreakTransliterator *)this; | |
| 88 nonConstThis->getBreakIterator(); // Lazy-create it if necessary | |
| 89 UnicodeString sText = replaceableAsString(text); | 95 UnicodeString sText = replaceableAsString(text); |
| 90 bi->setText(sText); | 96 bi->setText(sText); |
| 91 bi->preceding(offsets.start); | 97 bi->preceding(offsets.start); |
| 92 | 98 |
| 93 // To make things much easier, we will stack the boundaries, and then in
sert at the end. | 99 // To make things much easier, we will stack the boundaries, and then in
sert at the end. |
| 94 // generally, we won't need too many, since we will be filtered. | 100 // generally, we won't need too many, since we will be filtered. |
| 95 | 101 |
| 96 int32_t boundary; | 102 int32_t boundary; |
| 97 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.l
imit; boundary = bi->next()) { | 103 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.l
imit; boundary = bi->next()) { |
| 98 if (boundary == 0) continue; | 104 if (boundary == 0) continue; |
| (...skipping 26 matching lines...) Expand all Loading... |
| 125 boundary = boundaries->popi(); | 131 boundary = boundaries->popi(); |
| 126 text.handleReplaceBetween(boundary, boundary, fInsertion); | 132 text.handleReplaceBetween(boundary, boundary, fInsertion); |
| 127 } | 133 } |
| 128 } | 134 } |
| 129 | 135 |
| 130 // Now fix up the return values | 136 // Now fix up the return values |
| 131 offsets.contextLimit += delta; | 137 offsets.contextLimit += delta; |
| 132 offsets.limit += delta; | 138 offsets.limit += delta; |
| 133 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; | 139 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; |
| 134 | 140 |
| 141 // Return break iterator & boundaries vector to the cache. |
| 142 { |
| 143 Mutex m; |
| 144 BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *
>(this); |
| 145 if (nonConstThis->cachedBI.isNull()) { |
| 146 nonConstThis->cachedBI.moveFrom(bi); |
| 147 } |
| 148 if (nonConstThis->cachedBoundaries.isNull()) { |
| 149 nonConstThis->cachedBoundaries.moveFrom(boundaries); |
| 150 } |
| 151 } |
| 152 |
| 135 // TODO: do something with U_FAILURE(status); | 153 // TODO: do something with U_FAILURE(status); |
| 136 // (need to look at transliterators overall, not just here.) | 154 // (need to look at transliterators overall, not just here.) |
| 137 } | 155 } |
| 138 | 156 |
| 139 // | 157 // |
| 140 // getInsertion() | 158 // getInsertion() |
| 141 // | 159 // |
| 142 const UnicodeString &BreakTransliterator::getInsertion() const { | 160 const UnicodeString &BreakTransliterator::getInsertion() const { |
| 143 return fInsertion; | 161 return fInsertion; |
| 144 } | 162 } |
| 145 | 163 |
| 146 // | 164 // |
| 147 // setInsertion() | 165 // setInsertion() |
| 148 // | 166 // |
| 149 void BreakTransliterator::setInsertion(const UnicodeString &insertion) { | 167 void BreakTransliterator::setInsertion(const UnicodeString &insertion) { |
| 150 this->fInsertion = insertion; | 168 this->fInsertion = insertion; |
| 151 } | 169 } |
| 152 | 170 |
| 153 // | 171 // |
| 154 // getBreakIterator Lazily create the break iterator if it does | |
| 155 // not already exist. Copied from Java, probably | |
| 156 // better to just create it in the constructor. | |
| 157 // | |
| 158 BreakIterator *BreakTransliterator::getBreakIterator() { | |
| 159 UErrorCode status = U_ZERO_ERROR; | |
| 160 if (bi == NULL) { | |
| 161 // Note: Thai breaking behavior is universal, it is not | |
| 162 // tied to the Thai locale. | |
| 163 bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); | |
| 164 } | |
| 165 return bi; | |
| 166 } | |
| 167 | |
| 168 // | |
| 169 // replaceableAsString Hack to let break iterators work | 172 // replaceableAsString Hack to let break iterators work |
| 170 // on the replaceable text from transliterators. | 173 // on the replaceable text from transliterators. |
| 171 // In practice, the only real Replaceable type that we | 174 // In practice, the only real Replaceable type that we |
| 172 // will be seeing is UnicodeString, so this function | 175 // will be seeing is UnicodeString, so this function |
| 173 // will normally be efficient. | 176 // will normally be efficient. |
| 174 // | 177 // |
| 175 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { | 178 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { |
| 176 UnicodeString s; | 179 UnicodeString s; |
| 177 UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); | 180 UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); |
| 178 if (rs != NULL) { | 181 if (rs != NULL) { |
| 179 s = *rs; | 182 s = *rs; |
| 180 } else { | 183 } else { |
| 181 r.extractBetween(0, r.length(), s); | 184 r.extractBetween(0, r.length(), s); |
| 182 } | 185 } |
| 183 return s; | 186 return s; |
| 184 } | 187 } |
| 185 | 188 |
| 186 U_NAMESPACE_END | 189 U_NAMESPACE_END |
| 187 | 190 |
| 188 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | 191 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
| OLD | NEW |