icu46/source/i18n/unesctrn.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/unesctrn.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (c) 2001-2008, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * Date Name Description

	7 * 11/19/2001 aliu Creation.

	8 **********************************************************************

	9 */

	10

	11 #include "unicode/utypes.h"

	12

	13 #if !UCONFIG_NO_TRANSLITERATION

	14

	15 #include "unicode/uchar.h"

	16 #include "unesctrn.h"

	17 #include "util.h"

	18

	19 #include "cmemory.h"

	20

	21 U_NAMESPACE_BEGIN

	22

	23 /**

	24 * Special character marking the end of the spec[] array.

	25 */

	26 static const UChar END = 0xFFFF;

	27

	28 // Unicode: "U+10FFFF" hex, min=4, max=6

	29 static const UChar SPEC_Unicode[] = {

	30 2, 0, 16, 4, 6, 85/U/, 43/+/,

	31 END

	32 };

	33

	34 // Java: "\\uFFFF" hex, min=4, max=4

	35 static const UChar SPEC_Java[] = {

	36 2, 0, 16, 4, 4, 92/\/, 117/u/,

	37 END

	38 };

	39

	40 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8

	41 static const UChar SPEC_C[] = {

	42 2, 0, 16, 4, 4, 92/\/, 117/u/,

	43 2, 0, 16, 8, 8, 92/\/, 85/U/,

	44 END

	45 };

	46

	47 // XML: "􏿿" hex, min=1, max=6

	48 static const UChar SPEC_XML[] = {

	49 3, 1, 16, 1, 6, 38/&/, 35/#/, 120/x/, 59/;/,

	50 END

	51 };

	52

	53 // XML10: "􏿿" dec, min=1, max=7 (not really "Hex-Any")

	54 static const UChar SPEC_XML10[] = {

	55 2, 1, 10, 1, 7, 38/&/, 35/#/, 59/;/,

	56 END

	57 };

	58

	59 // Perl: "\\x{263A}" hex, min=1, max=6

	60 static const UChar SPEC_Perl[] = {

	61 3, 1, 16, 1, 6, 92/\/, 120/x/, 123/{/, 125/}/,

	62 END

	63 };

	64

	65 // All: Java, C, Perl, XML, XML10, Unicode

	66 static const UChar SPEC_Any[] = {

	67 2, 0, 16, 4, 6, 85/U/, 43/+/, // Unicode

	68 2, 0, 16, 4, 4, 92/\/, 117/u/, // Java

	69 2, 0, 16, 8, 8, 92/\/, 85/U/, // C (surrogates)

	70 3, 1, 16, 1, 6, 38/&/, 35/#/, 120/x/, 59/;/, // XML

	71 2, 1, 10, 1, 7, 38/&/, 35/#/, 59/;/, // XML10

	72 3, 1, 16, 1, 6, 92/\/, 120/x/, 123/{/, 125/}/, // Perl

	73 END

	74 };

	75

	76 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnescapeTransliterator)

	77

	78 static UChar* copySpec(const UChar* spec) {

	79 int32_t len = 0;

	80 while (spec[len] != END) {

	81 ++len;

	82 }

	83 ++len;

	84 UChar result = (UChar )uprv_malloc(len*sizeof(UChar));

	85 // Check for memory allocation error.

	86 if (result != NULL) {

	87 uprv_memcpy(result, spec, len*sizeof(result[0]));

	88 }

	89 return result;

	90 }

	91

	92 /**

	93 * Factory methods. Ignore the context.

	94 */

	95 static Transliterator* _createUnicode(const UnicodeString& ID, Transliterator::T oken /context/) {

	96 return new UnescapeTransliterator(ID, SPEC_Unicode);

	97 }

	98 static Transliterator* _createJava(const UnicodeString& ID, Transliterator::Toke n /context/) {

	99 return new UnescapeTransliterator(ID, SPEC_Java);

	100 }

	101 static Transliterator* _createC(const UnicodeString& ID, Transliterator::Token / context/) {

	102 return new UnescapeTransliterator(ID, SPEC_C);

	103 }

	104 static Transliterator* _createXML(const UnicodeString& ID, Transliterator::Token /context/) {

	105 return new UnescapeTransliterator(ID, SPEC_XML);

	106 }

	107 static Transliterator* _createXML10(const UnicodeString& ID, Transliterator::Tok en /context/) {

	108 return new UnescapeTransliterator(ID, SPEC_XML10);

	109 }

	110 static Transliterator* _createPerl(const UnicodeString& ID, Transliterator::Toke n /context/) {

	111 return new UnescapeTransliterator(ID, SPEC_Perl);

	112 }

	113 static Transliterator* _createAny(const UnicodeString& ID, Transliterator::Token /context/) {

	114 return new UnescapeTransliterator(ID, SPEC_Any);

	115 }

	116

	117 /**

	118 * Registers standard variants with the system. Called by

	119 * Transliterator during initialization.

	120 */

	121 void UnescapeTransliterator::registerIDs() {

	122 Token t = integerToken(0);

	123

	124 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Unicode"), _ createUnicode, t);

	125

	126 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Java"), _cre ateJava, t);

	127

	128 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/C"), _create C, t);

	129

	130 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/XML"), _crea teXML, t);

	131

	132 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/XML10"), _cr eateXML10, t);

	133

	134 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any/Perl"), _cre atePerl, t);

	135

	136 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Hex-Any"), _createAn y, t);

	137 }

	138

	139 /**

	140 * Constructor. Takes the encoded spec array.

	141 */

	142 UnescapeTransliterator::UnescapeTransliterator(const UnicodeString& newID,

	143 const UChar *newSpec) :

	144 Transliterator(newID, NULL)

	145 {

	146 this->spec = copySpec(newSpec);

	147 }

	148

	149 /**

	150 * Copy constructor.

	151 */

	152 UnescapeTransliterator::UnescapeTransliterator(const UnescapeTransliterator& o) :

	153 Transliterator(o) {

	154 this->spec = copySpec(o.spec);

	155 }

	156

	157 UnescapeTransliterator::~UnescapeTransliterator() {

	158 uprv_free(spec);

	159 }

	160

	161 /**

	162 * Transliterator API.

	163 */

	164 Transliterator* UnescapeTransliterator::clone() const {

	165 return new UnescapeTransliterator(*this);

	166 }

	167

	168 /**

	169 * Implements {@link Transliterator#handleTransliterate}.

	170 */

	171 void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPositi on& pos,

	172 UBool isIncremental) const {

	173 int32_t start = pos.start;

	174 int32_t limit = pos.limit;

	175 int32_t i, j, ipat;

	176

	177 while (start < limit) {

	178 // Loop over the forms in spec[]. Exit this loop when we

	179 // match one of the specs. Exit the outer loop if a

	180 // partial match is detected and isIncremental is true.

	181 for (j=0, ipat=0; spec[ipat] != END; ++j) {

	182

	183 // Read the header

	184 int32_t prefixLen = spec[ipat++];

	185 int32_t suffixLen = spec[ipat++];

	186 int8_t radix = (int8_t) spec[ipat++];

	187 int32_t minDigits = spec[ipat++];

	188 int32_t maxDigits = spec[ipat++];

	189

	190 // s is a copy of start that is advanced over the

	191 // characters as we parse them.

	192 int32_t s = start;

	193 UBool match = TRUE;

	194

	195 for (i=0; i<prefixLen; ++i) {

	196 if (s >= limit) {

	197 if (i > 0) {

	198 // We've already matched a character. This is

	199 // a partial match, so we return if in

	200 // incremental mode. In non-incremental mode,

	201 // go to the next spec.

	202 if (isIncremental) {

	203 goto exit;

	204 }

	205 match = FALSE;

	206 break;

	207 }

	208 }

	209 UChar c = text.charAt(s++);

	210 if (c != spec[ipat + i]) {

	211 match = FALSE;

	212 break;

	213 }

	214 }

	215

	216 if (match) {

	217 UChar32 u = 0;

	218 int32_t digitCount = 0;

	219 for (;;) {

	220 if (s >= limit) {

	221 // Check for partial match in incremental mode.

	222 if (s > start && isIncremental) {

	223 goto exit;

	224 }

	225 break;

	226 }

	227 UChar32 ch = text.char32At(s);

	228 int32_t digit = u_digit(ch, radix);

	229 if (digit < 0) {

	230 break;

	231 }

	232 s += UTF_CHAR_LENGTH(ch);

	233 u = (u * radix) + digit;

	234 if (++digitCount == maxDigits) {

	235 break;

	236 }

	237 }

	238

	239 match = (digitCount >= minDigits);

	240

	241 if (match) {

	242 for (i=0; i<suffixLen; ++i) {

	243 if (s >= limit) {

	244 // Check for partial match in incremental mode.

	245 if (s > start && isIncremental) {

	246 goto exit;

	247 }

	248 match = FALSE;

	249 break;

	250 }

	251 UChar c = text.charAt(s++);

	252 if (c != spec[ipat + prefixLen + i]) {

	253 match = FALSE;

	254 break;

	255 }

	256 }

	257

	258 if (match) {

	259 // At this point, we have a match

	260 UnicodeString str(u);

	261 text.handleReplaceBetween(start, s, str);

	262 limit -= s - start - str.length();

	263 // The following break statement leaves the

	264 // loop that is traversing the forms in

	265 // spec[]. We then parse the next input

	266 // character.

	267 break;

	268 }

	269 }

	270 }

	271

	272 ipat += prefixLen + suffixLen;

	273 }

	274

	275 if (start < limit) {

	276 start += UTF_CHAR_LENGTH(text.char32At(start));

	277 }

	278 }

	279

	280 exit:

	281 pos.contextLimit += limit - pos.limit;

	282 pos.limit = limit;

	283 pos.start = start;

	284 }

	285

	286 U_NAMESPACE_END

	287

	288 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

	289

	290 //eof

OLD	NEW

« no previous file with comments | « icu46/source/i18n/unesctrn.h ('k') | icu46/source/i18n/uni2name.h » ('j') | no next file with comments »