| Index: icu46/source/i18n/unesctrn.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/unesctrn.cpp (revision 0)
|
| +++ icu46/source/i18n/unesctrn.cpp (revision 0)
|
| @@ -0,0 +1,290 @@
|
| +/*
|
| + **********************************************************************
|
| + * Copyright (c) 2001-2008, International Business Machines
|
| + * Corporation and others. All Rights Reserved.
|
| + **********************************************************************
|
| + * Date Name Description
|
| + * 11/19/2001 aliu Creation.
|
| + **********************************************************************
|
| + */
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_TRANSLITERATION
|
| +
|
| +#include "unicode/uchar.h"
|
| +#include "unesctrn.h"
|
| +#include "util.h"
|
| +
|
| +#include "cmemory.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +/**
|
| + * Special character marking the end of the spec[] array.
|
| + */
|
| +static const UChar END = 0xFFFF;
|
| +
|
| +// Unicode: "U+10FFFF" hex, min=4, max=6
|
| +static const UChar SPEC_Unicode[] = {
|
| + 2, 0, 16, 4, 6, 85/*U*/, 43/*+*/,
|
| + END
|
| +};
|
| +
|
| +// Java: "\\uFFFF" hex, min=4, max=4
|
| +static const UChar SPEC_Java[] = {
|
| + 2, 0, 16, 4, 4, 92/*\*/, 117/*u*/,
|
| + END
|
| +};
|
| +
|
| +// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
|
| +static const UChar SPEC_C[] = {
|
| + 2, 0, 16, 4, 4, 92/*\*/, 117/*u*/,
|
| + 2, 0, 16, 8, 8, 92/*\*/, 85/*U*/,
|
| + END
|
| +};
|
| +
|
| +// XML: "" hex, min=1, max=6
|
| +static const UChar SPEC_XML[] = {
|
| + 3, 1, 16, 1, 6, 38/*&*/, 35/*#*/, 120/*x*/, 59/*;*/,
|
| + END
|
| +};
|
| +
|
| +// XML10: "" dec, min=1, max=7 (not really "Hex-Any")
|
| +static const UChar SPEC_XML10[] = {
|
| + 2, 1, 10, 1, 7, 38/*&*/, 35/*#*/, 59/*;*/,
|
| + END
|
| +};
|
| +
|
| +// Perl: "\\x{263A}" hex, min=1, max=6
|
| +static const UChar SPEC_Perl[] = {
|
| + 3, 1, 16, 1, 6, 92/*\*/, 120/*x*/, 123/*{*/, 125/*}*/,
|
| + END
|
| +};
|
| +
|
| +// All: Java, C, Perl, XML, XML10, Unicode
|
| +static const UChar SPEC_Any[] = {
|
| + 2, 0, 16, 4, 6, 85/*U*/, 43/*+*/, // Unicode
|
| + 2, 0, 16, 4, 4, 92/*\*/, 117/*u*/, // Java
|
| + 2, 0, 16, 8, 8, 92/*\*/, 85/*U*/, // C (surrogates)
|
| + 3, 1, 16, 1, 6, 38/*&*/, 35/*#*/, 120/*x*/, 59/*;*/, // XML
|
| + 2, 1, 10, 1, 7, 38/*&*/, 35/*#*/, 59/*;*/, // XML10
|
| + 3, 1, 16, 1, 6, 92/*\*/, 120/*x*/, 123/*{*/, 125/*}*/, // Perl
|
| + END
|
| +};
|
| +
|
| +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnescapeTransliterator)
|
| +
|
| +static UChar* copySpec(const UChar* spec) {
|
| + int32_t len = 0;
|
| + while (spec[len] != END) {
|
| + ++len;
|
| + }
|
| + ++len;
|
| + UChar *result = (UChar *)uprv_malloc(len*sizeof(UChar));
|
| + // Check for memory allocation error.
|
| + if (result != NULL) {
|
| + uprv_memcpy(result, spec, len*sizeof(result[0]));
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +/**
|
| + * Factory methods. Ignore the context.
|
| + */
|
| +static Transliterator* _createUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
| + return new UnescapeTransliterator(ID, SPEC_Unicode);
|
| +}
|
| +static Transliterator* _createJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
| + return new UnescapeTransliterator(ID, SPEC_Java);
|
| +}
|
| +static Transliterator* _createC(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
| + return new UnescapeTransliterator(ID, SPEC_C);
|
| +}
|
| +static Transliterator* _createXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
| + return new UnescapeTransliterator(ID, SPEC_XML);
|
| +}
|
| +static Transliterator* _createXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
| + return new UnescapeTransliterator(ID, SPEC_XML10);
|
| +}
|
| +static Transliterator* _createPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
| + return new UnescapeTransliterator(ID, SPEC_Perl);
|
| +}
|
| +static Transliterator* _createAny(const UnicodeString& ID, Transliterator::Token /*context*/) {
|
| + return new UnescapeTransliterator(ID, SPEC_Any);
|
| +}
|
| +
|
| +/**
|
| + * Registers standard variants with the system. Called by
|
| + * Transliterator during initialization.
|
| + */
|
| +void UnescapeTransliterator::registerIDs() {
|
| + Token t = integerToken(0);
|
| +
|
| + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Unicode"), _createUnicode, t);
|
| +
|
| + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Java"), _createJava, t);
|
| +
|
| + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/C"), _createC, t);
|
| +
|
| + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/XML"), _createXML, t);
|
| +
|
| + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/XML10"), _createXML10, t);
|
| +
|
| + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Perl"), _createPerl, t);
|
| +
|
| + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any"), _createAny, t);
|
| +}
|
| +
|
| +/**
|
| + * Constructor. Takes the encoded spec array.
|
| + */
|
| +UnescapeTransliterator::UnescapeTransliterator(const UnicodeString& newID,
|
| + const UChar *newSpec) :
|
| + Transliterator(newID, NULL)
|
| +{
|
| + this->spec = copySpec(newSpec);
|
| +}
|
| +
|
| +/**
|
| + * Copy constructor.
|
| + */
|
| +UnescapeTransliterator::UnescapeTransliterator(const UnescapeTransliterator& o) :
|
| + Transliterator(o) {
|
| + this->spec = copySpec(o.spec);
|
| +}
|
| +
|
| +UnescapeTransliterator::~UnescapeTransliterator() {
|
| + uprv_free(spec);
|
| +}
|
| +
|
| +/**
|
| + * Transliterator API.
|
| + */
|
| +Transliterator* UnescapeTransliterator::clone() const {
|
| + return new UnescapeTransliterator(*this);
|
| +}
|
| +
|
| +/**
|
| + * Implements {@link Transliterator#handleTransliterate}.
|
| + */
|
| +void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos,
|
| + UBool isIncremental) const {
|
| + int32_t start = pos.start;
|
| + int32_t limit = pos.limit;
|
| + int32_t i, j, ipat;
|
| +
|
| + while (start < limit) {
|
| + // Loop over the forms in spec[]. Exit this loop when we
|
| + // match one of the specs. Exit the outer loop if a
|
| + // partial match is detected and isIncremental is true.
|
| + for (j=0, ipat=0; spec[ipat] != END; ++j) {
|
| +
|
| + // Read the header
|
| + int32_t prefixLen = spec[ipat++];
|
| + int32_t suffixLen = spec[ipat++];
|
| + int8_t radix = (int8_t) spec[ipat++];
|
| + int32_t minDigits = spec[ipat++];
|
| + int32_t maxDigits = spec[ipat++];
|
| +
|
| + // s is a copy of start that is advanced over the
|
| + // characters as we parse them.
|
| + int32_t s = start;
|
| + UBool match = TRUE;
|
| +
|
| + for (i=0; i<prefixLen; ++i) {
|
| + if (s >= limit) {
|
| + if (i > 0) {
|
| + // We've already matched a character. This is
|
| + // a partial match, so we return if in
|
| + // incremental mode. In non-incremental mode,
|
| + // go to the next spec.
|
| + if (isIncremental) {
|
| + goto exit;
|
| + }
|
| + match = FALSE;
|
| + break;
|
| + }
|
| + }
|
| + UChar c = text.charAt(s++);
|
| + if (c != spec[ipat + i]) {
|
| + match = FALSE;
|
| + break;
|
| + }
|
| + }
|
| +
|
| + if (match) {
|
| + UChar32 u = 0;
|
| + int32_t digitCount = 0;
|
| + for (;;) {
|
| + if (s >= limit) {
|
| + // Check for partial match in incremental mode.
|
| + if (s > start && isIncremental) {
|
| + goto exit;
|
| + }
|
| + break;
|
| + }
|
| + UChar32 ch = text.char32At(s);
|
| + int32_t digit = u_digit(ch, radix);
|
| + if (digit < 0) {
|
| + break;
|
| + }
|
| + s += UTF_CHAR_LENGTH(ch);
|
| + u = (u * radix) + digit;
|
| + if (++digitCount == maxDigits) {
|
| + break;
|
| + }
|
| + }
|
| +
|
| + match = (digitCount >= minDigits);
|
| +
|
| + if (match) {
|
| + for (i=0; i<suffixLen; ++i) {
|
| + if (s >= limit) {
|
| + // Check for partial match in incremental mode.
|
| + if (s > start && isIncremental) {
|
| + goto exit;
|
| + }
|
| + match = FALSE;
|
| + break;
|
| + }
|
| + UChar c = text.charAt(s++);
|
| + if (c != spec[ipat + prefixLen + i]) {
|
| + match = FALSE;
|
| + break;
|
| + }
|
| + }
|
| +
|
| + if (match) {
|
| + // At this point, we have a match
|
| + UnicodeString str(u);
|
| + text.handleReplaceBetween(start, s, str);
|
| + limit -= s - start - str.length();
|
| + // The following break statement leaves the
|
| + // loop that is traversing the forms in
|
| + // spec[]. We then parse the next input
|
| + // character.
|
| + break;
|
| + }
|
| + }
|
| + }
|
| +
|
| + ipat += prefixLen + suffixLen;
|
| + }
|
| +
|
| + if (start < limit) {
|
| + start += UTF_CHAR_LENGTH(text.char32At(start));
|
| + }
|
| + }
|
| +
|
| + exit:
|
| + pos.contextLimit += limit - pos.limit;
|
| + pos.limit = limit;
|
| + pos.start = start;
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
| +
|
| +//eof
|
|
|
| Property changes on: icu46/source/i18n/unesctrn.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|