OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 1999-2008, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * Date Name Description |
| 7 * 11/17/99 aliu Creation. |
| 8 ********************************************************************** |
| 9 */ |
| 10 |
| 11 #include "unicode/utypes.h" |
| 12 |
| 13 #if !UCONFIG_NO_TRANSLITERATION |
| 14 |
| 15 #include "unicode/rep.h" |
| 16 #include "unicode/uniset.h" |
| 17 #include "rbt_pars.h" |
| 18 #include "rbt_data.h" |
| 19 #include "rbt_rule.h" |
| 20 #include "rbt.h" |
| 21 #include "umutex.h" |
| 22 |
| 23 U_NAMESPACE_BEGIN |
| 24 |
| 25 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator) |
| 26 |
| 27 static UMTX transliteratorDataMutex = NULL; |
| 28 static Replaceable *gLockedText = NULL; |
| 29 |
| 30 void RuleBasedTransliterator::_construct(const UnicodeString& rules, |
| 31 UTransDirection direction, |
| 32 UParseError& parseError, |
| 33 UErrorCode& status) { |
| 34 fData = 0; |
| 35 isDataOwned = TRUE; |
| 36 if (U_FAILURE(status)) { |
| 37 return; |
| 38 } |
| 39 |
| 40 TransliteratorParser parser(status); |
| 41 parser.parse(rules, direction, parseError, status); |
| 42 if (U_FAILURE(status)) { |
| 43 return; |
| 44 } |
| 45 |
| 46 if (parser.idBlockVector.size() != 0 || |
| 47 parser.compoundFilter != NULL || |
| 48 parser.dataVector.size() == 0) { |
| 49 status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT |
| 50 return; |
| 51 } |
| 52 |
| 53 fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); |
| 54 setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); |
| 55 } |
| 56 |
| 57 /** |
| 58 * Constructs a new transliterator from the given rules. |
| 59 * @param id the id for the transliterator. |
| 60 * @param rules rules, separated by ';' |
| 61 * @param direction either FORWARD or REVERSE. |
| 62 * @param adoptedFilter the filter for this transliterator. |
| 63 * @param parseError Struct to recieve information on position |
| 64 * of error if an error is encountered |
| 65 * @param status Output param set to success/failure code. |
| 66 * @exception IllegalArgumentException if rules are malformed |
| 67 * or direction is invalid. |
| 68 */ |
| 69 RuleBasedTransliterator::RuleBasedTransliterator( |
| 70 const UnicodeString& id, |
| 71 const UnicodeString& rules, |
| 72 UTransDirection direction, |
| 73 UnicodeFilter* adoptedFilter, |
| 74 UParseError& parseError, |
| 75 UErrorCode& status) : |
| 76 Transliterator(id, adoptedFilter) { |
| 77 _construct(rules, direction,parseError,status); |
| 78 } |
| 79 |
| 80 /** |
| 81 * Constructs a new transliterator from the given rules. |
| 82 * @param id the id for the transliterator. |
| 83 * @param rules rules, separated by ';' |
| 84 * @param direction either FORWARD or REVERSE. |
| 85 * @param adoptedFilter the filter for this transliterator. |
| 86 * @param status Output param set to success/failure code. |
| 87 * @exception IllegalArgumentException if rules are malformed |
| 88 * or direction is invalid. |
| 89 */ |
| 90 /*RuleBasedTransliterator::RuleBasedTransliterator( |
| 91 const UnicodeString& id, |
| 92 const UnicodeString& rules, |
| 93 UTransDirection direction, |
| 94 UnicodeFilter* adoptedFilter, |
| 95 UErrorCode& status) : |
| 96 Transliterator(id, adoptedFilter) { |
| 97 UParseError parseError; |
| 98 _construct(rules, direction,parseError, status); |
| 99 }*/ |
| 100 |
| 101 /** |
| 102 * Covenience constructor with no filter. |
| 103 */ |
| 104 /*RuleBasedTransliterator::RuleBasedTransliterator( |
| 105 const UnicodeString& id, |
| 106 const UnicodeString& rules, |
| 107 UTransDirection direction, |
| 108 UErrorCode& status) : |
| 109 Transliterator(id, 0) { |
| 110 UParseError parseError; |
| 111 _construct(rules, direction,parseError, status); |
| 112 }*/ |
| 113 |
| 114 /** |
| 115 * Covenience constructor with no filter and FORWARD direction. |
| 116 */ |
| 117 /*RuleBasedTransliterator::RuleBasedTransliterator( |
| 118 const UnicodeString& id, |
| 119 const UnicodeString& rules, |
| 120 UErrorCode& status) : |
| 121 Transliterator(id, 0) { |
| 122 UParseError parseError; |
| 123 _construct(rules, UTRANS_FORWARD, parseError, status); |
| 124 }*/ |
| 125 |
| 126 /** |
| 127 * Covenience constructor with FORWARD direction. |
| 128 */ |
| 129 /*RuleBasedTransliterator::RuleBasedTransliterator( |
| 130 const UnicodeString& id, |
| 131 const UnicodeString& rules, |
| 132 UnicodeFilter* adoptedFilter, |
| 133 UErrorCode& status) : |
| 134 Transliterator(id, adoptedFilter) { |
| 135 UParseError parseError; |
| 136 _construct(rules, UTRANS_FORWARD,parseError, status); |
| 137 }*/ |
| 138 |
| 139 RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, |
| 140 const TransliterationRuleData* theData, |
| 141 UnicodeFilter* adoptedFilter) : |
| 142 Transliterator(id, adoptedFilter), |
| 143 fData((TransliterationRuleData*)theData), // cast away const |
| 144 isDataOwned(FALSE) { |
| 145 setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); |
| 146 } |
| 147 |
| 148 /** |
| 149 * Internal constructor. |
| 150 */ |
| 151 RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, |
| 152 TransliterationRuleData* theDat
a, |
| 153 UBool isDataAdopted) : |
| 154 Transliterator(id, 0), |
| 155 fData(theData), |
| 156 isDataOwned(isDataAdopted) { |
| 157 setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); |
| 158 } |
| 159 |
| 160 /** |
| 161 * Copy constructor. |
| 162 */ |
| 163 RuleBasedTransliterator::RuleBasedTransliterator( |
| 164 const RuleBasedTransliterator& other) : |
| 165 Transliterator(other), fData(other.fData), |
| 166 isDataOwned(other.isDataOwned) { |
| 167 |
| 168 // The data object may or may not be owned. If it is not owned we |
| 169 // share it; it is invariant. If it is owned, it's still |
| 170 // invariant, but we need to copy it to prevent double-deletion. |
| 171 // If this becomes a performance issue (if people do a lot of RBT |
| 172 // copying -- unlikely) we can reference count the data object. |
| 173 |
| 174 // Only do a deep copy if this is owned data, that is, data that |
| 175 // will be later deleted. System transliterators contain |
| 176 // non-owned data. |
| 177 if (isDataOwned) { |
| 178 fData = new TransliterationRuleData(*other.fData); |
| 179 } |
| 180 } |
| 181 |
| 182 /** |
| 183 * Destructor. |
| 184 */ |
| 185 RuleBasedTransliterator::~RuleBasedTransliterator() { |
| 186 // Delete the data object only if we own it. |
| 187 if (isDataOwned) { |
| 188 delete fData; |
| 189 } |
| 190 } |
| 191 |
| 192 Transliterator* // Covariant return NOT ALLOWED (for portability) |
| 193 RuleBasedTransliterator::clone(void) const { |
| 194 return new RuleBasedTransliterator(*this); |
| 195 } |
| 196 |
| 197 /** |
| 198 * Implements {@link Transliterator#handleTransliterate}. |
| 199 */ |
| 200 void |
| 201 RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
index, |
| 202 UBool isIncremental) const { |
| 203 /* We keep contextStart and contextLimit fixed the entire time, |
| 204 * relative to the text -- contextLimit may move numerically if |
| 205 * text is inserted or removed. The start offset moves toward |
| 206 * limit, with replacements happening under it. |
| 207 * |
| 208 * Example: rules 1. ab>x|y |
| 209 * 2. yc>z |
| 210 * |
| 211 * |eabcd begin - no match, advance start |
| 212 * e|abcd match rule 1 - change text & adjust start |
| 213 * ex|ycd match rule 2 - change text & adjust start |
| 214 * exz|d no match, advance start |
| 215 * exzd| done |
| 216 */ |
| 217 |
| 218 /* A rule like |
| 219 * a>b|a |
| 220 * creates an infinite loop. To prevent that, we put an arbitrary |
| 221 * limit on the number of iterations that we take, one that is |
| 222 * high enough that any reasonable rules are ok, but low enough to |
| 223 * prevent a server from hanging. The limit is 16 times the |
| 224 * number of characters n, unless n is so large that 16n exceeds a |
| 225 * uint32_t. |
| 226 */ |
| 227 uint32_t loopCount = 0; |
| 228 uint32_t loopLimit = index.limit - index.start; |
| 229 if (loopLimit >= 0x10000000) { |
| 230 loopLimit = 0xFFFFFFFF; |
| 231 } else { |
| 232 loopLimit <<= 4; |
| 233 } |
| 234 |
| 235 // Transliterator locking. Rule-based Transliterators are not thread safe;
concurrent |
| 236 // operations must be prevented. |
| 237 // A Complication: compound transliterators can result in recursive entries
to this |
| 238 // function, sometimes with different "This" objects, always with the same
text. |
| 239 // Double-locking must be prevented in these cases. |
| 240 // |
| 241 |
| 242 // If the transliteration data is exclusively owned by this transliterator o
bject, |
| 243 // we don't need to do any locking. No sharing between transliterators is
possible, |
| 244 // so no concurrent access from multiple threads is possible. |
| 245 UBool lockedMutexAtThisLevel = FALSE; |
| 246 if (isDataOwned == FALSE) { |
| 247 // Test whether this request is operating on the same text string as som
e |
| 248 // some other transliteration that is still in progress and holding th
e |
| 249 // transliteration mutex. If so, do not lock the transliteration |
| 250 // mutex again. |
| 251 UBool needToLock; |
| 252 UMTX_CHECK(NULL, (&text != gLockedText), needToLock); |
| 253 if (needToLock) { |
| 254 umtx_lock(&transliteratorDataMutex); |
| 255 gLockedText = &text; |
| 256 lockedMutexAtThisLevel = TRUE; |
| 257 } |
| 258 } |
| 259 |
| 260 // Check to make sure we don't dereference a null pointer. |
| 261 if (fData != NULL) { |
| 262 while (index.start < index.limit && |
| 263 loopCount <= loopLimit && |
| 264 fData->ruleSet.transliterate(text, index, isIncremental)) { |
| 265 ++loopCount; |
| 266 } |
| 267 } |
| 268 if (lockedMutexAtThisLevel) { |
| 269 gLockedText = NULL; |
| 270 umtx_unlock(&transliteratorDataMutex); |
| 271 } |
| 272 } |
| 273 |
| 274 UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, |
| 275 UBool escapeUnprintable) const { |
| 276 return fData->ruleSet.toRules(rulesSource, escapeUnprintable); |
| 277 } |
| 278 |
| 279 /** |
| 280 * Implement Transliterator framework |
| 281 */ |
| 282 void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { |
| 283 fData->ruleSet.getSourceTargetSet(result, FALSE); |
| 284 } |
| 285 |
| 286 /** |
| 287 * Override Transliterator framework |
| 288 */ |
| 289 UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { |
| 290 return fData->ruleSet.getSourceTargetSet(result, TRUE); |
| 291 } |
| 292 |
| 293 U_NAMESPACE_END |
| 294 |
| 295 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
OLD | NEW |