icu46/source/i18n/rbt.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/i18n/rbt.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/i18n/rbt.cpp

===================================================================

--- icu46/source/i18n/rbt.cpp (revision 0)

+++ icu46/source/i18n/rbt.cpp (revision 0)

@@ -0,0 +1,295 @@

+/*

+**********************************************************************

+* Date Name Description

+* 11/17/99 aliu Creation.

+**********************************************************************

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_TRANSLITERATION

+#include "unicode/rep.h"

+#include "unicode/uniset.h"

+#include "rbt_pars.h"

+#include "rbt_data.h"

+#include "rbt_rule.h"

+#include "rbt.h"

+#include "umutex.h"

+U_NAMESPACE_BEGIN

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)

+static UMTX transliteratorDataMutex = NULL;

+static Replaceable *gLockedText = NULL;

+void RuleBasedTransliterator::_construct(const UnicodeString& rules,

+ UTransDirection direction,

+ UParseError& parseError,

+ UErrorCode& status) {

+ fData = 0;

+ isDataOwned = TRUE;

+ if (U_FAILURE(status)) {

+ return;

+ }

+ TransliteratorParser parser(status);

+ parser.parse(rules, direction, parseError, status);

+ if (U_FAILURE(status)) {

+ return;

+ }

+ if (parser.idBlockVector.size() != 0 ||

+ parser.compoundFilter != NULL ||

+ parser.dataVector.size() == 0) {

+ status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT

+ return;

+ }

+ fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);

+ setMaximumContextLength(fData->ruleSet.getMaximumContextLength());

+/**

+ * Constructs a new transliterator from the given rules.

+ * @param id the id for the transliterator.

+ * @param rules rules, separated by ';'

+ * @param direction either FORWARD or REVERSE.

+ * @param adoptedFilter the filter for this transliterator.

+ * @param parseError Struct to recieve information on position

+ * of error if an error is encountered

+ * @param status Output param set to success/failure code.

+ * @exception IllegalArgumentException if rules are malformed

+ * or direction is invalid.

+ */

+RuleBasedTransliterator::RuleBasedTransliterator(

+ const UnicodeString& id,

+ const UnicodeString& rules,

+ UTransDirection direction,

+ UnicodeFilter* adoptedFilter,

+ UParseError& parseError,

+ UErrorCode& status) :

+ Transliterator(id, adoptedFilter) {

+ _construct(rules, direction,parseError,status);

+/**

+ * Constructs a new transliterator from the given rules.

+ * @param id the id for the transliterator.

+ * @param rules rules, separated by ';'

+ * @param direction either FORWARD or REVERSE.

+ * @param adoptedFilter the filter for this transliterator.

+ * @param status Output param set to success/failure code.

+ * @exception IllegalArgumentException if rules are malformed

+ * or direction is invalid.

+ */

+/*RuleBasedTransliterator::RuleBasedTransliterator(

+ const UnicodeString& id,

+ const UnicodeString& rules,

+ UTransDirection direction,

+ UnicodeFilter* adoptedFilter,

+ UErrorCode& status) :

+ Transliterator(id, adoptedFilter) {

+ UParseError parseError;

+ _construct(rules, direction,parseError, status);

+}*/

+/**

+ * Covenience constructor with no filter.

+ */

+/*RuleBasedTransliterator::RuleBasedTransliterator(

+ const UnicodeString& id,

+ const UnicodeString& rules,

+ UTransDirection direction,

+ UErrorCode& status) :

+ Transliterator(id, 0) {

+ UParseError parseError;

+ _construct(rules, direction,parseError, status);

+}*/

+/**

+ * Covenience constructor with no filter and FORWARD direction.

+ */

+/*RuleBasedTransliterator::RuleBasedTransliterator(

+ const UnicodeString& id,

+ const UnicodeString& rules,

+ UErrorCode& status) :

+ Transliterator(id, 0) {

+ UParseError parseError;

+ _construct(rules, UTRANS_FORWARD, parseError, status);

+}*/

+/**

+ * Covenience constructor with FORWARD direction.

+ */

+/*RuleBasedTransliterator::RuleBasedTransliterator(

+ const UnicodeString& id,

+ const UnicodeString& rules,

+ UnicodeFilter* adoptedFilter,

+ UErrorCode& status) :

+ Transliterator(id, adoptedFilter) {

+ UParseError parseError;

+ _construct(rules, UTRANS_FORWARD,parseError, status);

+}*/

+RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,

+ const TransliterationRuleData* theData,

+ UnicodeFilter* adoptedFilter) :

+ Transliterator(id, adoptedFilter),

+ fData((TransliterationRuleData*)theData), // cast away const

+ isDataOwned(FALSE) {

+ setMaximumContextLength(fData->ruleSet.getMaximumContextLength());

+/**

+ * Internal constructor.

+ */

+RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,

+ TransliterationRuleData* theData,

+ UBool isDataAdopted) :

+ Transliterator(id, 0),

+ fData(theData),

+ isDataOwned(isDataAdopted) {

+ setMaximumContextLength(fData->ruleSet.getMaximumContextLength());

+/**

+ * Copy constructor.

+ */

+RuleBasedTransliterator::RuleBasedTransliterator(

+ const RuleBasedTransliterator& other) :

+ Transliterator(other), fData(other.fData),

+ isDataOwned(other.isDataOwned) {

+ // The data object may or may not be owned. If it is not owned we

+ // share it; it is invariant. If it is owned, it's still

+ // invariant, but we need to copy it to prevent double-deletion.

+ // If this becomes a performance issue (if people do a lot of RBT

+ // copying -- unlikely) we can reference count the data object.

+ // Only do a deep copy if this is owned data, that is, data that

+ // will be later deleted. System transliterators contain

+ // non-owned data.

+ if (isDataOwned) {

+ fData = new TransliterationRuleData(*other.fData);

+ }

+/**

+ * Destructor.

+ */

+RuleBasedTransliterator::~RuleBasedTransliterator() {

+ // Delete the data object only if we own it.

+ if (isDataOwned) {

+ delete fData;

+ }

+Transliterator* // Covariant return NOT ALLOWED (for portability)

+RuleBasedTransliterator::clone(void) const {

+ return new RuleBasedTransliterator(*this);

+/**

+ * Implements {@link Transliterator#handleTransliterate}.

+ */

+void

+RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,

+ UBool isIncremental) const {

+ /* We keep contextStart and contextLimit fixed the entire time,

+ * relative to the text -- contextLimit may move numerically if

+ * text is inserted or removed. The start offset moves toward

+ * limit, with replacements happening under it.

+ *

+ * Example: rules 1. ab>x|y

+ * 2. yc>z

+ *

+ * |eabcd begin - no match, advance start

+ * e|abcd match rule 1 - change text & adjust start

+ * ex|ycd match rule 2 - change text & adjust start

+ * exz|d no match, advance start

+ * exzd| done

+ */

+ /* A rule like

+ * a>b|a

+ * creates an infinite loop. To prevent that, we put an arbitrary

+ * limit on the number of iterations that we take, one that is

+ * high enough that any reasonable rules are ok, but low enough to

+ * prevent a server from hanging. The limit is 16 times the

+ * number of characters n, unless n is so large that 16n exceeds a

+ * uint32_t.

+ */

+ uint32_t loopCount = 0;

+ uint32_t loopLimit = index.limit - index.start;

+ if (loopLimit >= 0x10000000) {

+ loopLimit = 0xFFFFFFFF;

+ } else {

+ loopLimit <<= 4;

+ }

+ // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent

+ // operations must be prevented.

+ // A Complication: compound transliterators can result in recursive entries to this

+ // function, sometimes with different "This" objects, always with the same text.

+ // Double-locking must be prevented in these cases.

+ //

+ // If the transliteration data is exclusively owned by this transliterator object,

+ // we don't need to do any locking. No sharing between transliterators is possible,

+ // so no concurrent access from multiple threads is possible.

+ UBool lockedMutexAtThisLevel = FALSE;

+ if (isDataOwned == FALSE) {

+ // Test whether this request is operating on the same text string as some

+ // some other transliteration that is still in progress and holding the

+ // transliteration mutex. If so, do not lock the transliteration

+ // mutex again.

+ UBool needToLock;

+ UMTX_CHECK(NULL, (&text != gLockedText), needToLock);

+ if (needToLock) {

+ umtx_lock(&transliteratorDataMutex);

+ gLockedText = &text;

+ lockedMutexAtThisLevel = TRUE;

+ }

+ // Check to make sure we don't dereference a null pointer.

+ if (fData != NULL) {

+ while (index.start < index.limit &&

+ loopCount <= loopLimit &&

+ fData->ruleSet.transliterate(text, index, isIncremental)) {

+ ++loopCount;

+ }

+ if (lockedMutexAtThisLevel) {

+ gLockedText = NULL;

+ umtx_unlock(&transliteratorDataMutex);

+ }

+UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,

+ UBool escapeUnprintable) const {

+ return fData->ruleSet.toRules(rulesSource, escapeUnprintable);

+/**

+ * Implement Transliterator framework

+ */

+void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {

+ fData->ruleSet.getSourceTargetSet(result, FALSE);

+/**

+ * Override Transliterator framework

+ */

+UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {

+ return fData->ruleSet.getSourceTargetSet(result, TRUE);

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_TRANSLITERATION */

Property changes on: icu46/source/i18n/rbt.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/i18n/rbt.h ('k') | icu46/source/i18n/rbt_data.h » ('j') | no next file with comments »