icu46/source/i18n/unesctrn.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/i18n/unesctrn.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/i18n/unesctrn.cpp

===================================================================

--- icu46/source/i18n/unesctrn.cpp (revision 0)

+++ icu46/source/i18n/unesctrn.cpp (revision 0)

@@ -0,0 +1,290 @@

+/*

+ **********************************************************************

+ * Date Name Description

+ * 11/19/2001 aliu Creation.

+ **********************************************************************

+ */

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_TRANSLITERATION

+#include "unicode/uchar.h"

+#include "unesctrn.h"

+#include "util.h"

+#include "cmemory.h"

+U_NAMESPACE_BEGIN

+/**

+ * Special character marking the end of the spec[] array.

+ */

+static const UChar END = 0xFFFF;

+// Unicode: "U+10FFFF" hex, min=4, max=6

+static const UChar SPEC_Unicode[] = {

+ 2, 0, 16, 4, 6, 85/*U*/, 43/*+*/,

+ END

+};

+// Java: "\\uFFFF" hex, min=4, max=4

+static const UChar SPEC_Java[] = {

+ 2, 0, 16, 4, 4, 92/*\*/, 117/*u*/,

+ END

+};

+// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8

+static const UChar SPEC_C[] = {

+ 2, 0, 16, 4, 4, 92/*\*/, 117/*u*/,

+ 2, 0, 16, 8, 8, 92/*\*/, 85/*U*/,

+ END

+};

+// XML: "􏿿" hex, min=1, max=6

+static const UChar SPEC_XML[] = {

+ 3, 1, 16, 1, 6, 38/*&*/, 35/*#*/, 120/*x*/, 59/*;*/,

+ END

+};

+// XML10: "􏿿" dec, min=1, max=7 (not really "Hex-Any")

+static const UChar SPEC_XML10[] = {

+ 2, 1, 10, 1, 7, 38/*&*/, 35/*#*/, 59/*;*/,

+ END

+};

+// Perl: "\\x{263A}" hex, min=1, max=6

+static const UChar SPEC_Perl[] = {

+ 3, 1, 16, 1, 6, 92/*\*/, 120/*x*/, 123/*{*/, 125/*}*/,

+ END

+};

+// All: Java, C, Perl, XML, XML10, Unicode

+static const UChar SPEC_Any[] = {

+ 2, 0, 16, 4, 6, 85/*U*/, 43/*+*/, // Unicode

+ 2, 0, 16, 4, 4, 92/*\*/, 117/*u*/, // Java

+ 2, 0, 16, 8, 8, 92/*\*/, 85/*U*/, // C (surrogates)

+ 3, 1, 16, 1, 6, 38/*&*/, 35/*#*/, 120/*x*/, 59/*;*/, // XML

+ 2, 1, 10, 1, 7, 38/*&*/, 35/*#*/, 59/*;*/, // XML10

+ 3, 1, 16, 1, 6, 92/*\*/, 120/*x*/, 123/*{*/, 125/*}*/, // Perl

+ END

+};

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnescapeTransliterator)

+static UChar* copySpec(const UChar* spec) {

+ int32_t len = 0;

+ while (spec[len] != END) {

+ ++len;

+ }

+ ++len;

+ UChar *result = (UChar *)uprv_malloc(len*sizeof(UChar));

+ // Check for memory allocation error.

+ if (result != NULL) {

+ uprv_memcpy(result, spec, len*sizeof(result[0]));

+ }

+ return result;

+/**

+ * Factory methods. Ignore the context.

+ */

+static Transliterator* _createUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {

+ return new UnescapeTransliterator(ID, SPEC_Unicode);

+static Transliterator* _createJava(const UnicodeString& ID, Transliterator::Token /*context*/) {

+ return new UnescapeTransliterator(ID, SPEC_Java);

+static Transliterator* _createC(const UnicodeString& ID, Transliterator::Token /*context*/) {

+ return new UnescapeTransliterator(ID, SPEC_C);

+static Transliterator* _createXML(const UnicodeString& ID, Transliterator::Token /*context*/) {

+ return new UnescapeTransliterator(ID, SPEC_XML);

+static Transliterator* _createXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {

+ return new UnescapeTransliterator(ID, SPEC_XML10);

+static Transliterator* _createPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {

+ return new UnescapeTransliterator(ID, SPEC_Perl);

+static Transliterator* _createAny(const UnicodeString& ID, Transliterator::Token /*context*/) {

+ return new UnescapeTransliterator(ID, SPEC_Any);

+/**

+ * Registers standard variants with the system. Called by

+ * Transliterator during initialization.

+ */

+void UnescapeTransliterator::registerIDs() {

+ Token t = integerToken(0);

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Unicode"), _createUnicode, t);

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Java"), _createJava, t);

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/C"), _createC, t);

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/XML"), _createXML, t);

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/XML10"), _createXML10, t);

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Perl"), _createPerl, t);

+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any"), _createAny, t);

+/**

+ * Constructor. Takes the encoded spec array.

+ */

+UnescapeTransliterator::UnescapeTransliterator(const UnicodeString& newID,

+ const UChar *newSpec) :

+ Transliterator(newID, NULL)

+ this->spec = copySpec(newSpec);

+/**

+ * Copy constructor.

+ */

+UnescapeTransliterator::UnescapeTransliterator(const UnescapeTransliterator& o) :

+ Transliterator(o) {

+ this->spec = copySpec(o.spec);

+UnescapeTransliterator::~UnescapeTransliterator() {

+ uprv_free(spec);

+/**

+ * Transliterator API.

+ */

+Transliterator* UnescapeTransliterator::clone() const {

+ return new UnescapeTransliterator(*this);

+/**

+ * Implements {@link Transliterator#handleTransliterate}.

+ */

+void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos,

+ UBool isIncremental) const {

+ int32_t start = pos.start;

+ int32_t limit = pos.limit;

+ int32_t i, j, ipat;

+ while (start < limit) {

+ // Loop over the forms in spec[]. Exit this loop when we

+ // match one of the specs. Exit the outer loop if a

+ // partial match is detected and isIncremental is true.

+ for (j=0, ipat=0; spec[ipat] != END; ++j) {

+ // Read the header

+ int32_t prefixLen = spec[ipat++];

+ int32_t suffixLen = spec[ipat++];

+ int8_t radix = (int8_t) spec[ipat++];

+ int32_t minDigits = spec[ipat++];

+ int32_t maxDigits = spec[ipat++];

+ // s is a copy of start that is advanced over the

+ // characters as we parse them.

+ int32_t s = start;

+ UBool match = TRUE;

+ for (i=0; i<prefixLen; ++i) {

+ if (s >= limit) {

+ if (i > 0) {

+ // We've already matched a character. This is

+ // a partial match, so we return if in

+ // incremental mode. In non-incremental mode,

+ // go to the next spec.

+ if (isIncremental) {

+ goto exit;

+ }

+ match = FALSE;

+ break;

+ }

+ UChar c = text.charAt(s++);

+ if (c != spec[ipat + i]) {

+ match = FALSE;

+ break;

+ }

+ if (match) {

+ UChar32 u = 0;

+ int32_t digitCount = 0;

+ for (;;) {

+ if (s >= limit) {

+ // Check for partial match in incremental mode.

+ if (s > start && isIncremental) {

+ goto exit;

+ }

+ break;

+ }

+ UChar32 ch = text.char32At(s);

+ int32_t digit = u_digit(ch, radix);

+ if (digit < 0) {

+ break;

+ }

+ s += UTF_CHAR_LENGTH(ch);

+ u = (u * radix) + digit;

+ if (++digitCount == maxDigits) {

+ break;

+ }

+ match = (digitCount >= minDigits);

+ if (match) {

+ for (i=0; i<suffixLen; ++i) {

+ if (s >= limit) {

+ // Check for partial match in incremental mode.

+ if (s > start && isIncremental) {

+ goto exit;

+ }

+ match = FALSE;

+ break;

+ }

+ UChar c = text.charAt(s++);

+ if (c != spec[ipat + prefixLen + i]) {

+ match = FALSE;

+ break;

+ }

+ if (match) {

+ // At this point, we have a match

+ UnicodeString str(u);

+ text.handleReplaceBetween(start, s, str);

+ limit -= s - start - str.length();

+ // The following break statement leaves the

+ // loop that is traversing the forms in

+ // spec[]. We then parse the next input

+ // character.

+ break;

+ }

+ ipat += prefixLen + suffixLen;

+ }

+ if (start < limit) {

+ start += UTF_CHAR_LENGTH(text.char32At(start));

+ }

+ exit:

+ pos.contextLimit += limit - pos.limit;

+ pos.limit = limit;

+ pos.start = start;

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_TRANSLITERATION */

+//eof

Property changes on: icu46/source/i18n/unesctrn.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/i18n/unesctrn.h ('k') | icu46/source/i18n/uni2name.h » ('j') | no next file with comments »