Index: icu46/source/i18n/rbt.cpp |
=================================================================== |
--- icu46/source/i18n/rbt.cpp (revision 0) |
+++ icu46/source/i18n/rbt.cpp (revision 0) |
@@ -0,0 +1,295 @@ |
+/* |
+********************************************************************** |
+* Copyright (C) 1999-2008, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+********************************************************************** |
+* Date Name Description |
+* 11/17/99 aliu Creation. |
+********************************************************************** |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_TRANSLITERATION |
+ |
+#include "unicode/rep.h" |
+#include "unicode/uniset.h" |
+#include "rbt_pars.h" |
+#include "rbt_data.h" |
+#include "rbt_rule.h" |
+#include "rbt.h" |
+#include "umutex.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator) |
+ |
+static UMTX transliteratorDataMutex = NULL; |
+static Replaceable *gLockedText = NULL; |
+ |
+void RuleBasedTransliterator::_construct(const UnicodeString& rules, |
+ UTransDirection direction, |
+ UParseError& parseError, |
+ UErrorCode& status) { |
+ fData = 0; |
+ isDataOwned = TRUE; |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ TransliteratorParser parser(status); |
+ parser.parse(rules, direction, parseError, status); |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ if (parser.idBlockVector.size() != 0 || |
+ parser.compoundFilter != NULL || |
+ parser.dataVector.size() == 0) { |
+ status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT |
+ return; |
+ } |
+ |
+ fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); |
+ setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); |
+} |
+ |
+/** |
+ * Constructs a new transliterator from the given rules. |
+ * @param id the id for the transliterator. |
+ * @param rules rules, separated by ';' |
+ * @param direction either FORWARD or REVERSE. |
+ * @param adoptedFilter the filter for this transliterator. |
+ * @param parseError Struct to recieve information on position |
+ * of error if an error is encountered |
+ * @param status Output param set to success/failure code. |
+ * @exception IllegalArgumentException if rules are malformed |
+ * or direction is invalid. |
+ */ |
+RuleBasedTransliterator::RuleBasedTransliterator( |
+ const UnicodeString& id, |
+ const UnicodeString& rules, |
+ UTransDirection direction, |
+ UnicodeFilter* adoptedFilter, |
+ UParseError& parseError, |
+ UErrorCode& status) : |
+ Transliterator(id, adoptedFilter) { |
+ _construct(rules, direction,parseError,status); |
+} |
+ |
+/** |
+ * Constructs a new transliterator from the given rules. |
+ * @param id the id for the transliterator. |
+ * @param rules rules, separated by ';' |
+ * @param direction either FORWARD or REVERSE. |
+ * @param adoptedFilter the filter for this transliterator. |
+ * @param status Output param set to success/failure code. |
+ * @exception IllegalArgumentException if rules are malformed |
+ * or direction is invalid. |
+ */ |
+/*RuleBasedTransliterator::RuleBasedTransliterator( |
+ const UnicodeString& id, |
+ const UnicodeString& rules, |
+ UTransDirection direction, |
+ UnicodeFilter* adoptedFilter, |
+ UErrorCode& status) : |
+ Transliterator(id, adoptedFilter) { |
+ UParseError parseError; |
+ _construct(rules, direction,parseError, status); |
+}*/ |
+ |
+/** |
+ * Covenience constructor with no filter. |
+ */ |
+/*RuleBasedTransliterator::RuleBasedTransliterator( |
+ const UnicodeString& id, |
+ const UnicodeString& rules, |
+ UTransDirection direction, |
+ UErrorCode& status) : |
+ Transliterator(id, 0) { |
+ UParseError parseError; |
+ _construct(rules, direction,parseError, status); |
+}*/ |
+ |
+/** |
+ * Covenience constructor with no filter and FORWARD direction. |
+ */ |
+/*RuleBasedTransliterator::RuleBasedTransliterator( |
+ const UnicodeString& id, |
+ const UnicodeString& rules, |
+ UErrorCode& status) : |
+ Transliterator(id, 0) { |
+ UParseError parseError; |
+ _construct(rules, UTRANS_FORWARD, parseError, status); |
+}*/ |
+ |
+/** |
+ * Covenience constructor with FORWARD direction. |
+ */ |
+/*RuleBasedTransliterator::RuleBasedTransliterator( |
+ const UnicodeString& id, |
+ const UnicodeString& rules, |
+ UnicodeFilter* adoptedFilter, |
+ UErrorCode& status) : |
+ Transliterator(id, adoptedFilter) { |
+ UParseError parseError; |
+ _construct(rules, UTRANS_FORWARD,parseError, status); |
+}*/ |
+ |
+RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, |
+ const TransliterationRuleData* theData, |
+ UnicodeFilter* adoptedFilter) : |
+ Transliterator(id, adoptedFilter), |
+ fData((TransliterationRuleData*)theData), // cast away const |
+ isDataOwned(FALSE) { |
+ setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); |
+} |
+ |
+/** |
+ * Internal constructor. |
+ */ |
+RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, |
+ TransliterationRuleData* theData, |
+ UBool isDataAdopted) : |
+ Transliterator(id, 0), |
+ fData(theData), |
+ isDataOwned(isDataAdopted) { |
+ setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); |
+} |
+ |
+/** |
+ * Copy constructor. |
+ */ |
+RuleBasedTransliterator::RuleBasedTransliterator( |
+ const RuleBasedTransliterator& other) : |
+ Transliterator(other), fData(other.fData), |
+ isDataOwned(other.isDataOwned) { |
+ |
+ // The data object may or may not be owned. If it is not owned we |
+ // share it; it is invariant. If it is owned, it's still |
+ // invariant, but we need to copy it to prevent double-deletion. |
+ // If this becomes a performance issue (if people do a lot of RBT |
+ // copying -- unlikely) we can reference count the data object. |
+ |
+ // Only do a deep copy if this is owned data, that is, data that |
+ // will be later deleted. System transliterators contain |
+ // non-owned data. |
+ if (isDataOwned) { |
+ fData = new TransliterationRuleData(*other.fData); |
+ } |
+} |
+ |
+/** |
+ * Destructor. |
+ */ |
+RuleBasedTransliterator::~RuleBasedTransliterator() { |
+ // Delete the data object only if we own it. |
+ if (isDataOwned) { |
+ delete fData; |
+ } |
+} |
+ |
+Transliterator* // Covariant return NOT ALLOWED (for portability) |
+RuleBasedTransliterator::clone(void) const { |
+ return new RuleBasedTransliterator(*this); |
+} |
+ |
+/** |
+ * Implements {@link Transliterator#handleTransliterate}. |
+ */ |
+void |
+RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index, |
+ UBool isIncremental) const { |
+ /* We keep contextStart and contextLimit fixed the entire time, |
+ * relative to the text -- contextLimit may move numerically if |
+ * text is inserted or removed. The start offset moves toward |
+ * limit, with replacements happening under it. |
+ * |
+ * Example: rules 1. ab>x|y |
+ * 2. yc>z |
+ * |
+ * |eabcd begin - no match, advance start |
+ * e|abcd match rule 1 - change text & adjust start |
+ * ex|ycd match rule 2 - change text & adjust start |
+ * exz|d no match, advance start |
+ * exzd| done |
+ */ |
+ |
+ /* A rule like |
+ * a>b|a |
+ * creates an infinite loop. To prevent that, we put an arbitrary |
+ * limit on the number of iterations that we take, one that is |
+ * high enough that any reasonable rules are ok, but low enough to |
+ * prevent a server from hanging. The limit is 16 times the |
+ * number of characters n, unless n is so large that 16n exceeds a |
+ * uint32_t. |
+ */ |
+ uint32_t loopCount = 0; |
+ uint32_t loopLimit = index.limit - index.start; |
+ if (loopLimit >= 0x10000000) { |
+ loopLimit = 0xFFFFFFFF; |
+ } else { |
+ loopLimit <<= 4; |
+ } |
+ |
+ // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent |
+ // operations must be prevented. |
+ // A Complication: compound transliterators can result in recursive entries to this |
+ // function, sometimes with different "This" objects, always with the same text. |
+ // Double-locking must be prevented in these cases. |
+ // |
+ |
+ // If the transliteration data is exclusively owned by this transliterator object, |
+ // we don't need to do any locking. No sharing between transliterators is possible, |
+ // so no concurrent access from multiple threads is possible. |
+ UBool lockedMutexAtThisLevel = FALSE; |
+ if (isDataOwned == FALSE) { |
+ // Test whether this request is operating on the same text string as some |
+ // some other transliteration that is still in progress and holding the |
+ // transliteration mutex. If so, do not lock the transliteration |
+ // mutex again. |
+ UBool needToLock; |
+ UMTX_CHECK(NULL, (&text != gLockedText), needToLock); |
+ if (needToLock) { |
+ umtx_lock(&transliteratorDataMutex); |
+ gLockedText = &text; |
+ lockedMutexAtThisLevel = TRUE; |
+ } |
+ } |
+ |
+ // Check to make sure we don't dereference a null pointer. |
+ if (fData != NULL) { |
+ while (index.start < index.limit && |
+ loopCount <= loopLimit && |
+ fData->ruleSet.transliterate(text, index, isIncremental)) { |
+ ++loopCount; |
+ } |
+ } |
+ if (lockedMutexAtThisLevel) { |
+ gLockedText = NULL; |
+ umtx_unlock(&transliteratorDataMutex); |
+ } |
+} |
+ |
+UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, |
+ UBool escapeUnprintable) const { |
+ return fData->ruleSet.toRules(rulesSource, escapeUnprintable); |
+} |
+ |
+/** |
+ * Implement Transliterator framework |
+ */ |
+void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { |
+ fData->ruleSet.getSourceTargetSet(result, FALSE); |
+} |
+ |
+/** |
+ * Override Transliterator framework |
+ */ |
+UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { |
+ return fData->ruleSet.getSourceTargetSet(result, TRUE); |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
Property changes on: icu46/source/i18n/rbt.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |