Index: icu46/source/i18n/anytrans.cpp |
=================================================================== |
--- icu46/source/i18n/anytrans.cpp (revision 0) |
+++ icu46/source/i18n/anytrans.cpp (revision 0) |
@@ -0,0 +1,386 @@ |
+/* |
+***************************************************************** |
+* Copyright (c) 2002-2008, International Business Machines Corporation |
+* and others. All Rights Reserved. |
+***************************************************************** |
+* Date Name Description |
+* 06/06/2002 aliu Creation. |
+***************************************************************** |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_TRANSLITERATION |
+ |
+#include "unicode/uobject.h" |
+#include "unicode/uscript.h" |
+#include "nultrans.h" |
+#include "anytrans.h" |
+#include "uvector.h" |
+#include "tridpars.h" |
+#include "hash.h" |
+#include "putilimp.h" |
+#include "uinvchar.h" |
+ |
+//------------------------------------------------------------ |
+// Constants |
+ |
+static const UChar TARGET_SEP = 45; // '-' |
+static const UChar VARIANT_SEP = 47; // '/' |
+static const UChar ANY[] = {65,110,121,0}; // "Any" |
+static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null" |
+static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-" |
+ |
+//------------------------------------------------------------ |
+ |
+U_CDECL_BEGIN |
+/** |
+ * Deleter function for Transliterator*. |
+ */ |
+static void U_CALLCONV |
+_deleteTransliterator(void *obj) { |
+ delete (U_NAMESPACE_QUALIFIER Transliterator*) obj; |
+} |
+U_CDECL_END |
+ |
+//------------------------------------------------------------ |
+ |
+U_NAMESPACE_BEGIN |
+ |
+//------------------------------------------------------------ |
+// ScriptRunIterator |
+ |
+/** |
+ * Returns a series of ranges corresponding to scripts. They will be |
+ * of the form: |
+ * |
+ * ccccSScSSccccTTcTcccc - c = common, S = first script, T = second |
+ * | | - first run (start, limit) |
+ * | | - second run (start, limit) |
+ * |
+ * That is, the runs will overlap. The reason for this is so that a |
+ * transliterator can consider common characters both before and after |
+ * the scripts. |
+ */ |
+class ScriptRunIterator : public UMemory { |
+private: |
+ const Replaceable& text; |
+ int32_t textStart; |
+ int32_t textLimit; |
+ |
+public: |
+ /** |
+ * The code of the current run, valid after next() returns. May |
+ * be USCRIPT_INVALID_CODE if and only if the entire text is |
+ * COMMON/INHERITED. |
+ */ |
+ UScriptCode scriptCode; |
+ |
+ /** |
+ * The start of the run, inclusive, valid after next() returns. |
+ */ |
+ int32_t start; |
+ |
+ /** |
+ * The end of the run, exclusive, valid after next() returns. |
+ */ |
+ int32_t limit; |
+ |
+ /** |
+ * Constructs a run iterator over the given text from start |
+ * (inclusive) to limit (exclusive). |
+ */ |
+ ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit); |
+ |
+ /** |
+ * Returns TRUE if there are any more runs. TRUE is always |
+ * returned at least once. Upon return, the caller should |
+ * examine scriptCode, start, and limit. |
+ */ |
+ UBool next(); |
+ |
+ /** |
+ * Adjusts internal indices for a change in the limit index of the |
+ * given delta. A positive delta means the limit has increased. |
+ */ |
+ void adjustLimit(int32_t delta); |
+ |
+private: |
+ ScriptRunIterator(const ScriptRunIterator &other); // forbid copying of this class |
+ ScriptRunIterator &operator=(const ScriptRunIterator &other); // forbid copying of this class |
+}; |
+ |
+ScriptRunIterator::ScriptRunIterator(const Replaceable& theText, |
+ int32_t myStart, int32_t myLimit) : |
+ text(theText) |
+{ |
+ textStart = myStart; |
+ textLimit = myLimit; |
+ limit = myStart; |
+} |
+ |
+UBool ScriptRunIterator::next() { |
+ UChar32 ch; |
+ UScriptCode s; |
+ UErrorCode ec = U_ZERO_ERROR; |
+ |
+ scriptCode = USCRIPT_INVALID_CODE; // don't know script yet |
+ start = limit; |
+ |
+ // Are we done? |
+ if (start == textLimit) { |
+ return FALSE; |
+ } |
+ |
+ // Move start back to include adjacent COMMON or INHERITED |
+ // characters |
+ while (start > textStart) { |
+ ch = text.char32At(start - 1); // look back |
+ s = uscript_getScript(ch, &ec); |
+ if (s == USCRIPT_COMMON || s == USCRIPT_INHERITED) { |
+ --start; |
+ } else { |
+ break; |
+ } |
+ } |
+ |
+ // Move limit ahead to include COMMON, INHERITED, and characters |
+ // of the current script. |
+ while (limit < textLimit) { |
+ ch = text.char32At(limit); // look ahead |
+ s = uscript_getScript(ch, &ec); |
+ if (s != USCRIPT_COMMON && s != USCRIPT_INHERITED) { |
+ if (scriptCode == USCRIPT_INVALID_CODE) { |
+ scriptCode = s; |
+ } else if (s != scriptCode) { |
+ break; |
+ } |
+ } |
+ ++limit; |
+ } |
+ |
+ // Return TRUE even if the entire text is COMMON / INHERITED, in |
+ // which case scriptCode will be USCRIPT_INVALID_CODE. |
+ return TRUE; |
+} |
+ |
+void ScriptRunIterator::adjustLimit(int32_t delta) { |
+ limit += delta; |
+ textLimit += delta; |
+} |
+ |
+//------------------------------------------------------------ |
+// AnyTransliterator |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator) |
+ |
+AnyTransliterator::AnyTransliterator(const UnicodeString& id, |
+ const UnicodeString& theTarget, |
+ const UnicodeString& theVariant, |
+ UScriptCode theTargetScript, |
+ UErrorCode& ec) : |
+ Transliterator(id, NULL), |
+ targetScript(theTargetScript) |
+{ |
+ cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); |
+ if (U_FAILURE(ec)) { |
+ return; |
+ } |
+ uhash_setValueDeleter(cache, _deleteTransliterator); |
+ |
+ target = theTarget; |
+ if (theVariant.length() > 0) { |
+ target.append(VARIANT_SEP).append(theVariant); |
+ } |
+} |
+ |
+AnyTransliterator::~AnyTransliterator() { |
+ uhash_close(cache); |
+} |
+ |
+/** |
+ * Copy constructor. |
+ */ |
+AnyTransliterator::AnyTransliterator(const AnyTransliterator& o) : |
+ Transliterator(o), |
+ target(o.target), |
+ targetScript(o.targetScript) |
+{ |
+ // Don't copy the cache contents |
+ UErrorCode ec = U_ZERO_ERROR; |
+ cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); |
+ if (U_FAILURE(ec)) { |
+ return; |
+ } |
+ uhash_setValueDeleter(cache, _deleteTransliterator); |
+} |
+ |
+/** |
+ * Transliterator API. |
+ */ |
+Transliterator* AnyTransliterator::clone() const { |
+ return new AnyTransliterator(*this); |
+} |
+ |
+/** |
+ * Implements {@link Transliterator#handleTransliterate}. |
+ */ |
+void AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos, |
+ UBool isIncremental) const { |
+ int32_t allStart = pos.start; |
+ int32_t allLimit = pos.limit; |
+ |
+ ScriptRunIterator it(text, pos.contextStart, pos.contextLimit); |
+ |
+ while (it.next()) { |
+ // Ignore runs in the ante context |
+ if (it.limit <= allStart) continue; |
+ |
+ // Try to instantiate transliterator from it.scriptCode to |
+ // our target or target/variant |
+ Transliterator* t = getTransliterator(it.scriptCode); |
+ |
+ if (t == NULL) { |
+ // We have no transliterator. Do nothing, but keep |
+ // pos.start up to date. |
+ pos.start = it.limit; |
+ continue; |
+ } |
+ |
+ // If the run end is before the transliteration limit, do |
+ // a non-incremental transliteration. Otherwise do an |
+ // incremental one. |
+ UBool incremental = isIncremental && (it.limit >= allLimit); |
+ |
+ pos.start = uprv_max(allStart, it.start); |
+ pos.limit = uprv_min(allLimit, it.limit); |
+ int32_t limit = pos.limit; |
+ t->filteredTransliterate(text, pos, incremental); |
+ int32_t delta = pos.limit - limit; |
+ allLimit += delta; |
+ it.adjustLimit(delta); |
+ |
+ // We're done if we enter the post context |
+ if (it.limit >= allLimit) break; |
+ } |
+ |
+ // Restore limit. pos.start is fine where the last transliterator |
+ // left it, or at the end of the last run. |
+ pos.limit = allLimit; |
+} |
+ |
+Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { |
+ |
+ if (source == targetScript || source == USCRIPT_INVALID_CODE) { |
+ return NULL; |
+ } |
+ |
+ Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source); |
+ if (t == NULL) { |
+ UErrorCode ec = U_ZERO_ERROR; |
+ UnicodeString sourceName(uscript_getName(source), -1, US_INV); |
+ UnicodeString id(sourceName); |
+ id.append(TARGET_SEP).append(target); |
+ |
+ t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); |
+ if (U_FAILURE(ec) || t == NULL) { |
+ delete t; |
+ |
+ // Try to pivot around Latin, our most common script |
+ id = sourceName; |
+ id.append(LATIN_PIVOT).append(target); |
+ t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); |
+ if (U_FAILURE(ec) || t == NULL) { |
+ delete t; |
+ t = NULL; |
+ } |
+ } |
+ |
+ if (t != NULL) { |
+ uhash_iput(cache, (int32_t) source, t, &ec); |
+ } |
+ } |
+ |
+ return t; |
+} |
+ |
+/** |
+ * Return the script code for a given name, or -1 if not found. |
+ */ |
+static UScriptCode scriptNameToCode(const UnicodeString& name) { |
+ char buf[128]; |
+ UScriptCode code; |
+ UErrorCode ec = U_ZERO_ERROR; |
+ int32_t nameLen = name.length(); |
+ UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen); |
+ |
+ if (isInvariant) { |
+ name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV); |
+ buf[127] = 0; // Make sure that we NULL terminate the string. |
+ } |
+ if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec)) |
+ { |
+ code = USCRIPT_INVALID_CODE; |
+ } |
+ return code; |
+} |
+ |
+/** |
+ * Registers standard transliterators with the system. Called by |
+ * Transliterator during initialization. Scan all current targets and |
+ * register those that are scripts T as Any-T/V. |
+ */ |
+void AnyTransliterator::registerIDs() { |
+ |
+ UErrorCode ec = U_ZERO_ERROR; |
+ Hashtable seen(TRUE, ec); |
+ |
+ int32_t sourceCount = Transliterator::_countAvailableSources(); |
+ for (int32_t s=0; s<sourceCount; ++s) { |
+ UnicodeString source; |
+ Transliterator::_getAvailableSource(s, source); |
+ |
+ // Ignore the "Any" source |
+ if (source.caseCompare(ANY, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) continue; |
+ |
+ int32_t targetCount = Transliterator::_countAvailableTargets(source); |
+ for (int32_t t=0; t<targetCount; ++t) { |
+ UnicodeString target; |
+ Transliterator::_getAvailableTarget(t, source, target); |
+ |
+ // Only process each target once |
+ if (seen.geti(target) != 0) continue; |
+ ec = U_ZERO_ERROR; |
+ seen.puti(target, 1, ec); |
+ |
+ // Get the script code for the target. If not a script, ignore. |
+ UScriptCode targetScript = scriptNameToCode(target); |
+ if (targetScript == USCRIPT_INVALID_CODE) continue; |
+ |
+ int32_t variantCount = Transliterator::_countAvailableVariants(source, target); |
+ // assert(variantCount >= 1); |
+ for (int32_t v=0; v<variantCount; ++v) { |
+ UnicodeString variant; |
+ Transliterator::_getAvailableVariant(v, source, target, variant); |
+ |
+ UnicodeString id; |
+ TransliteratorIDParser::STVtoID(ANY, target, variant, id); |
+ ec = U_ZERO_ERROR; |
+ AnyTransliterator* t = new AnyTransliterator(id, target, variant, |
+ targetScript, ec); |
+ if (U_FAILURE(ec)) { |
+ delete t; |
+ } else { |
+ Transliterator::_registerInstance(t); |
+ Transliterator::_registerSpecialInverse(target, NULL_ID, FALSE); |
+ } |
+ } |
+ } |
+ } |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
+ |
+//eof |
Property changes on: icu46/source/i18n/anytrans.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |