OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2008-2010, International Business Machines | 3 * Copyright (C) 2008-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * Date Name Description | 6 * Date Name Description |
7 * 05/11/2008 Andy Heninger Port from Java | 7 * 05/11/2008 Andy Heninger Port from Java |
8 ********************************************************************** | 8 ********************************************************************** |
9 */ | 9 */ |
10 | 10 |
11 #include "unicode/utypes.h" | 11 #include "unicode/utypes.h" |
12 | 12 |
13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION | 13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION |
14 | 14 |
| 15 #include "unicode/brkiter.h" |
| 16 #include "unicode/localpointer.h" |
| 17 #include "unicode/uchar.h" |
15 #include "unicode/unifilt.h" | 18 #include "unicode/unifilt.h" |
16 #include "unicode/uchar.h" | |
17 #include "unicode/uniset.h" | 19 #include "unicode/uniset.h" |
18 #include "unicode/brkiter.h" | 20 |
19 #include "brktrans.h" | 21 #include "brktrans.h" |
20 #include "unicode/uchar.h" | |
21 #include "cmemory.h" | 22 #include "cmemory.h" |
| 23 #include "mutex.h" |
22 #include "uprops.h" | 24 #include "uprops.h" |
23 #include "uinvchar.h" | 25 #include "uinvchar.h" |
24 #include "util.h" | 26 #include "util.h" |
25 #include "uvectr32.h" | 27 #include "uvectr32.h" |
26 | 28 |
27 U_NAMESPACE_BEGIN | 29 U_NAMESPACE_BEGIN |
28 | 30 |
29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) | 31 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) |
30 | 32 |
31 static const UChar SPACE = 32; // ' ' | 33 static const UChar SPACE = 32; // ' ' |
32 | 34 |
33 | 35 |
34 /** | 36 /** |
35 * Constructs a transliterator with the default delimiters '{' and | 37 * Constructs a transliterator with the default delimiters '{' and |
36 * '}'. | 38 * '}'. |
37 */ | 39 */ |
38 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : | 40 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : |
39 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), | 41 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), |
40 fInsertion(SPACE) { | 42 cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) { |
41 bi = NULL; | |
42 UErrorCode status = U_ZERO_ERROR; | |
43 boundaries = new UVector32(status); | |
44 } | 43 } |
45 | 44 |
46 | 45 |
47 /** | 46 /** |
48 * Destructor. | 47 * Destructor. |
49 */ | 48 */ |
50 BreakTransliterator::~BreakTransliterator() { | 49 BreakTransliterator::~BreakTransliterator() { |
51 delete bi; | |
52 bi = NULL; | |
53 delete boundaries; | |
54 boundaries = NULL; | |
55 } | 50 } |
56 | 51 |
57 /** | 52 /** |
58 * Copy constructor. | 53 * Copy constructor. |
59 */ | 54 */ |
60 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : | 55 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : |
61 Transliterator(o) { | 56 Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.
fInsertion) { |
62 bi = NULL; | 57 } |
63 if (o.bi != NULL) { | |
64 bi = o.bi->clone(); | |
65 } | |
66 fInsertion = o.fInsertion; | |
67 UErrorCode status = U_ZERO_ERROR; | |
68 boundaries = new UVector32(status); | |
69 } | |
70 | 58 |
71 | 59 |
72 /** | 60 /** |
73 * Transliterator API. | 61 * Transliterator API. |
74 */ | 62 */ |
75 Transliterator* BreakTransliterator::clone(void) const { | 63 Transliterator* BreakTransliterator::clone(void) const { |
76 return new BreakTransliterator(*this); | 64 return new BreakTransliterator(*this); |
77 } | 65 } |
78 | 66 |
79 /** | 67 /** |
80 * Implements {@link Transliterator#handleTransliterate}. | 68 * Implements {@link Transliterator#handleTransliterate}. |
81 */ | 69 */ |
82 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
offsets, | 70 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
offsets, |
83 UBool isIncremental ) const
{ | 71 UBool isIncremental ) const
{ |
84 | 72 |
85 UErrorCode status = U_ZERO_ERROR; | 73 UErrorCode status = U_ZERO_ERROR; |
| 74 LocalPointer<BreakIterator> bi; |
| 75 LocalPointer<UVector32> boundaries; |
| 76 |
| 77 { |
| 78 Mutex m; |
| 79 BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *
>(this); |
| 80 boundaries.moveFrom(nonConstThis->cachedBoundaries); |
| 81 bi.moveFrom(nonConstThis->cachedBI); |
| 82 } |
| 83 if (bi.isNull()) { |
| 84 bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish
(), status)); |
| 85 } |
| 86 if (boundaries.isNull()) { |
| 87 boundaries.adoptInstead(new UVector32(status)); |
| 88 } |
| 89 |
| 90 if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) { |
| 91 return; |
| 92 } |
| 93 |
86 boundaries->removeAllElements(); | 94 boundaries->removeAllElements(); |
87 BreakTransliterator *nonConstThis = (BreakTransliterator *)this; | |
88 nonConstThis->getBreakIterator(); // Lazy-create it if necessary | |
89 UnicodeString sText = replaceableAsString(text); | 95 UnicodeString sText = replaceableAsString(text); |
90 bi->setText(sText); | 96 bi->setText(sText); |
91 bi->preceding(offsets.start); | 97 bi->preceding(offsets.start); |
92 | 98 |
93 // To make things much easier, we will stack the boundaries, and then in
sert at the end. | 99 // To make things much easier, we will stack the boundaries, and then in
sert at the end. |
94 // generally, we won't need too many, since we will be filtered. | 100 // generally, we won't need too many, since we will be filtered. |
95 | 101 |
96 int32_t boundary; | 102 int32_t boundary; |
97 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.l
imit; boundary = bi->next()) { | 103 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.l
imit; boundary = bi->next()) { |
98 if (boundary == 0) continue; | 104 if (boundary == 0) continue; |
(...skipping 26 matching lines...) Expand all Loading... |
125 boundary = boundaries->popi(); | 131 boundary = boundaries->popi(); |
126 text.handleReplaceBetween(boundary, boundary, fInsertion); | 132 text.handleReplaceBetween(boundary, boundary, fInsertion); |
127 } | 133 } |
128 } | 134 } |
129 | 135 |
130 // Now fix up the return values | 136 // Now fix up the return values |
131 offsets.contextLimit += delta; | 137 offsets.contextLimit += delta; |
132 offsets.limit += delta; | 138 offsets.limit += delta; |
133 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; | 139 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; |
134 | 140 |
| 141 // Return break iterator & boundaries vector to the cache. |
| 142 { |
| 143 Mutex m; |
| 144 BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *
>(this); |
| 145 if (nonConstThis->cachedBI.isNull()) { |
| 146 nonConstThis->cachedBI.moveFrom(bi); |
| 147 } |
| 148 if (nonConstThis->cachedBoundaries.isNull()) { |
| 149 nonConstThis->cachedBoundaries.moveFrom(boundaries); |
| 150 } |
| 151 } |
| 152 |
135 // TODO: do something with U_FAILURE(status); | 153 // TODO: do something with U_FAILURE(status); |
136 // (need to look at transliterators overall, not just here.) | 154 // (need to look at transliterators overall, not just here.) |
137 } | 155 } |
138 | 156 |
139 // | 157 // |
140 // getInsertion() | 158 // getInsertion() |
141 // | 159 // |
142 const UnicodeString &BreakTransliterator::getInsertion() const { | 160 const UnicodeString &BreakTransliterator::getInsertion() const { |
143 return fInsertion; | 161 return fInsertion; |
144 } | 162 } |
145 | 163 |
146 // | 164 // |
147 // setInsertion() | 165 // setInsertion() |
148 // | 166 // |
149 void BreakTransliterator::setInsertion(const UnicodeString &insertion) { | 167 void BreakTransliterator::setInsertion(const UnicodeString &insertion) { |
150 this->fInsertion = insertion; | 168 this->fInsertion = insertion; |
151 } | 169 } |
152 | 170 |
153 // | 171 // |
154 // getBreakIterator Lazily create the break iterator if it does | |
155 // not already exist. Copied from Java, probably | |
156 // better to just create it in the constructor. | |
157 // | |
158 BreakIterator *BreakTransliterator::getBreakIterator() { | |
159 UErrorCode status = U_ZERO_ERROR; | |
160 if (bi == NULL) { | |
161 // Note: Thai breaking behavior is universal, it is not | |
162 // tied to the Thai locale. | |
163 bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); | |
164 } | |
165 return bi; | |
166 } | |
167 | |
168 // | |
169 // replaceableAsString Hack to let break iterators work | 172 // replaceableAsString Hack to let break iterators work |
170 // on the replaceable text from transliterators. | 173 // on the replaceable text from transliterators. |
171 // In practice, the only real Replaceable type that we | 174 // In practice, the only real Replaceable type that we |
172 // will be seeing is UnicodeString, so this function | 175 // will be seeing is UnicodeString, so this function |
173 // will normally be efficient. | 176 // will normally be efficient. |
174 // | 177 // |
175 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { | 178 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { |
176 UnicodeString s; | 179 UnicodeString s; |
177 UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); | 180 UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); |
178 if (rs != NULL) { | 181 if (rs != NULL) { |
179 s = *rs; | 182 s = *rs; |
180 } else { | 183 } else { |
181 r.extractBetween(0, r.length(), s); | 184 r.extractBetween(0, r.length(), s); |
182 } | 185 } |
183 return s; | 186 return s; |
184 } | 187 } |
185 | 188 |
186 U_NAMESPACE_END | 189 U_NAMESPACE_END |
187 | 190 |
188 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | 191 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
OLD | NEW |