icu46/source/i18n/nortrans.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/i18n/nortrans.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/i18n/nortrans.cpp

===================================================================

--- icu46/source/i18n/nortrans.cpp (revision 0)

+++ icu46/source/i18n/nortrans.cpp (revision 0)

@@ -0,0 +1,175 @@

+/*

+**********************************************************************

+* Date Name Description

+* 07/03/01 aliu Creation.

+**********************************************************************

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_TRANSLITERATION

+#include "unicode/normalizer2.h"

+#include "cstring.h"

+#include "nortrans.h"

+U_NAMESPACE_BEGIN

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NormalizationTransliterator)

+static inline Transliterator::Token cstrToken(const char *s) {

+ return Transliterator::pointerToken((void *)s);

+/**

+ * System registration hook.

+ */

+void NormalizationTransliterator::registerIDs() {

+ // In the Token, the byte after the NUL is the UNormalization2Mode.

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFC"),

+ _create, cstrToken("nfc\0\0"));

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKC"),

+ _create, cstrToken("nfkc\0\0"));

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFD"),

+ _create, cstrToken("nfc\0\1"));

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKD"),

+ _create, cstrToken("nfkc\0\1"));

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCD"),

+ _create, cstrToken("nfc\0\2"));

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCC"),

+ _create, cstrToken("nfc\0\3"));

+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFC"),

+ UNICODE_STRING_SIMPLE("NFD"), TRUE);

+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFKC"),

+ UNICODE_STRING_SIMPLE("NFKD"), TRUE);

+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCC"),

+ UNICODE_STRING_SIMPLE("NFD"), FALSE);

+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCD"),

+ UNICODE_STRING_SIMPLE("FCD"), FALSE);

+/**

+ * Factory methods

+ */

+Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,

+ Token context) {

+ const char *name = (const char *)context.pointer;

+ UNormalization2Mode mode = (UNormalization2Mode)uprv_strchr(name, 0)[1];

+ UErrorCode errorCode = U_ZERO_ERROR;

+ const Normalizer2 *norm2 = Normalizer2::getInstance(NULL, name, mode, errorCode);

+ if(U_SUCCESS(errorCode)) {

+ return new NormalizationTransliterator(ID, *norm2);

+ } else {

+ return NULL;

+ }

+/**

+ * Constructs a transliterator.

+ */

+NormalizationTransliterator::NormalizationTransliterator(const UnicodeString& id,

+ const Normalizer2 &norm2) :

+ Transliterator(id, 0), fNorm2(norm2) {}

+/**

+ * Destructor.

+ */

+NormalizationTransliterator::~NormalizationTransliterator() {

+/**

+ * Copy constructor.

+ */

+NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :

+ Transliterator(o), fNorm2(o.fNorm2) {}

+/**

+ * Transliterator API.

+ */

+Transliterator* NormalizationTransliterator::clone(void) const {

+ return new NormalizationTransliterator(*this);

+/**

+ * Implements {@link Transliterator#handleTransliterate}.

+ */

+void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,

+ UBool isIncremental) const {

+ // start and limit of the input range

+ int32_t start = offsets.start;

+ int32_t limit = offsets.limit;

+ if(start >= limit) {

+ return;

+ }

+ /*

+ * Normalize as short chunks at a time as possible even in

+ * bulk mode, so that styled text is minimally disrupted.

+ * In incremental mode, a chunk that ends with offsets.limit

+ * must not be normalized.

+ *

+ * If it was known that the input text is not styled, then

+ * a bulk mode normalization could look like this:

+ UnicodeString input, normalized;

+ int32_t length = limit - start;

+ _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));

+ input.releaseBuffer(length);

+ UErrorCode status = U_ZERO_ERROR;

+ fNorm2.normalize(input, normalized, status);

+ text.handleReplaceBetween(start, limit, normalized);

+ int32_t delta = normalized.length() - length;

+ offsets.contextLimit += delta;

+ offsets.limit += delta;

+ offsets.start = limit + delta;

+ */

+ UErrorCode errorCode = U_ZERO_ERROR;

+ UnicodeString segment;

+ UnicodeString normalized;

+ UChar32 c = text.char32At(start);

+ do {

+ int32_t prev = start;

+ // Skip at least one character so we make progress.

+ // c holds the character at start.

+ segment.remove();

+ do {

+ segment.append(c);

+ start += U16_LENGTH(c);

+ } while(start < limit && !fNorm2.hasBoundaryBefore(c = text.char32At(start)));

+ if(start == limit && isIncremental && !fNorm2.hasBoundaryAfter(c)) {

+ // stop in incremental mode when we reach the input limit

+ // in case there are additional characters that could change the

+ // normalization result

+ start=prev;

+ break;

+ }

+ fNorm2.normalize(segment, normalized, errorCode);

+ if(U_FAILURE(errorCode)) {

+ break;

+ }

+ if(segment != normalized) {

+ // replace the input chunk with its normalized form

+ text.handleReplaceBetween(prev, start, normalized);

+ // update all necessary indexes accordingly

+ int32_t delta = normalized.length() - (start - prev);

+ start += delta;

+ limit += delta;

+ }

+ } while(start < limit);

+ offsets.start = start;

+ offsets.contextLimit += limit - offsets.limit;

+ offsets.limit = limit;

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_TRANSLITERATION */

Property changes on: icu46/source/i18n/nortrans.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/i18n/nortrans.h ('k') | icu46/source/i18n/nultrans.h » ('j') | no next file with comments »