icu46/source/i18n/name2uni.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/name2uni.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 2001-2008, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * Date Name Description

	7 * 06/07/01 aliu Creation.

	8 **********************************************************************

	9 */

	10

	11 #include "unicode/utypes.h"

	12

	13 #if !UCONFIG_NO_TRANSLITERATION

	14

	15 #include "unicode/unifilt.h"

	16 #include "unicode/uchar.h"

	17 #include "unicode/uniset.h"

	18 #include "name2uni.h"

	19 #include "cmemory.h"

	20 #include "uprops.h"

	21 #include "uinvchar.h"

	22 #include "util.h"

	23

	24 U_NAMESPACE_BEGIN

	25

	26 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NameUnicodeTransliterator)

	27

	28 static const UChar OPEN[] = {92,78,126,123,126,0}; // "\N~{~"

	29 static const UChar OPEN_DELIM = 92; // '\\' first char of OPEN

	30 static const UChar CLOSE_DELIM = 125; // '}'

	31 static const UChar SPACE = 32; // ' '

	32

	33 U_CDECL_BEGIN

	34

	35 // USetAdder implementation

	36 // Does not use uset.h to reduce code dependencies

	37 static void U_CALLCONV

	38 _set_add(USet *set, UChar32 c) {

	39 uset_add(set, c);

	40 }

	41

	42 // These functions aren't used.

	43 /*static void U_CALLCONV

	44 _set_addRange(USet *set, UChar32 start, UChar32 end) {

	45 ((UnicodeSet *)set)->add(start, end);

	46 }

	47

	48 static void U_CALLCONV

	49 _set_addString(USet set, const UChar str, int32_t length) {

	50 ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));

	51 }*/

	52

	53 U_CDECL_END

	54

	55 /**

	56 * Constructs a transliterator with the default delimiters '{' and

	57 * '}'.

	58 */

	59 NameUnicodeTransliterator::NameUnicodeTransliterator(UnicodeFilter* adoptedFilte r) :

	60 Transliterator(UNICODE_STRING("Name-Any", 8), adoptedFilter) {

	61

	62 UnicodeSet *legalPtr = &legal;

	63 // Get the legal character set

	64 USetAdder sa = {

	65 (USet )legalPtr, // USet == UnicodeSet*

	66 _set_add,

	67 NULL, // Don't need _set_addRange

	68 NULL, // Don't need _set_addString

	69 NULL, // Don't need remove()

	70 NULL

	71 };

	72 uprv_getCharNameCharacters(&sa);

	73 }

	74

	75 /**

	76 * Destructor.

	77 */

	78 NameUnicodeTransliterator::~NameUnicodeTransliterator() {}

	79

	80 /**

	81 * Copy constructor.

	82 */

	83 NameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliter ator& o) :

	84 Transliterator(o), legal(o.legal) {}

	85

	86 /**

	87 * Assignment operator.

	88 */

	89 /*NameUnicodeTransliterator& NameUnicodeTransliterator::operator=(

	90 const NameUnicodeTransliterator& o) {

	91 Transliterator::operator=(o);

	92 // not necessary: the legal sets should all be the same -- legal=o.legal;

	93 return *this;

	94 }*/

	95

	96 /**

	97 * Transliterator API.

	98 */

	99 Transliterator* NameUnicodeTransliterator::clone(void) const {

	100 return new NameUnicodeTransliterator(*this);

	101 }

	102

	103 /**

	104 * Implements {@link Transliterator#handleTransliterate}.

	105 */

	106 void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos ition& offsets,

	107 UBool isIncremental) const {

	108 // The failure mode, here and below, is to behave like Any-Null,

	109 // if either there is no name data (max len == 0) or there is no

	110 // memory (malloc() => NULL).

	111

	112 int32_t maxLen = uprv_getMaxCharNameLength();

	113 if (maxLen == 0) {

	114 offsets.start = offsets.limit;

	115 return;

	116 }

	117

	118 // Accomodate the longest possible name

	119 ++maxLen; // allow for temporary trailing space

	120 char* cbuf = (char*) uprv_malloc(maxLen);

	121 if (cbuf == NULL) {

	122 offsets.start = offsets.limit;

	123 return;

	124 }

	125

	126 UnicodeString openPat(TRUE, OPEN, -1);

	127 UnicodeString str, name;

	128

	129 int32_t cursor = offsets.start;

	130 int32_t limit = offsets.limit;

	131

	132 // Modes:

	133 // 0 - looking for open delimiter

	134 // 1 - after open delimiter

	135 int32_t mode = 0;

	136 int32_t openPos = -1; // open delim candidate pos

	137

	138 UChar32 c;

	139 while (cursor < limit) {

	140 c = text.char32At(cursor);

	141

	142 switch (mode) {

	143 case 0: // looking for open delimiter

	144 if (c == OPEN_DELIM) { // quick check first

	145 openPos = cursor;

	146 int32_t i =

	147 ICU_Utility::parsePattern(openPat, text, cursor, limit);

	148 if (i >= 0 && i < limit) {

	149 mode = 1;

	150 name.truncate(0);

	151 cursor = i;

	152 continue; // *** reprocess char32At(cursor)

	153 }

	154 }

	155 break;

	156

	157 case 1: // after open delimiter

	158 // Look for legal chars. If \s+ is found, convert it

	159 // to a single space. If closeDelimiter is found, exit

	160 // the loop. If any other character is found, exit the

	161 // loop. If the limit is reached, exit the loop.

	162

	163 // Convert \s+ => SPACE. This assumes there are no

	164 // runs of >1 space characters in names.

	165 if (uprv_isRuleWhiteSpace(c)) {

	166 // Ignore leading whitespace

	167 if (name.length() > 0 &&

	168 name.charAt(name.length()-1) != SPACE) {

	169 name.append(SPACE);

	170 // If we are too long then abort. maxLen includes

	171 // temporary trailing space, so use '>'.

	172 if (name.length() > maxLen) {

	173 mode = 0;

	174 }

	175 }

	176 break;

	177 }

	178

	179 if (c == CLOSE_DELIM) {

	180 int32_t len = name.length();

	181

	182 // Delete trailing space, if any

	183 if (len > 0 &&

	184 name.charAt(len-1) == SPACE) {

	185 --len;

	186 }

	187

	188 if (uprv_isInvariantUString(name.getBuffer(), len)) {

	189 name.extract(0, len, cbuf, maxLen, US_INV);

	190

	191 UErrorCode status = U_ZERO_ERROR;

	192 c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status);

	193 if (U_SUCCESS(status)) {

	194 // Lookup succeeded

	195

	196 // assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1);

	197 cursor++; // advance over CLOSE_DELIM

	198

	199 str.truncate(0);

	200 str.append(c);

	201 text.handleReplaceBetween(openPos, cursor, str);

	202

	203 // Adjust indices for the change in the length of

	204 // the string. Do not assume that str.length() ==

	205 // 1, in case of surrogates.

	206 int32_t delta = cursor - openPos - str.length();

	207 cursor -= delta;

	208 limit -= delta;

	209 // assert(cursor == openPos + str.length());

	210 }

	211 }

	212 // If the lookup failed, we leave things as-is and

	213 // still switch to mode 0 and continue.

	214 mode = 0;

	215 openPos = -1; // close off candidate

	216 continue; // *** reprocess char32At(cursor)

	217 }

	218

	219 // Check if c is a legal char. We assume here that

	220 // legal.contains(OPEN_DELIM) is FALSE, so when we abort a

	221 // name, we don't have to go back to openPos+1.

	222 if (legal.contains(c)) {

	223 name.append(c);

	224 // If we go past the longest possible name then abort.

	225 // maxLen includes temporary trailing space, so use '>='.

	226 if (name.length() >= maxLen) {

	227 mode = 0;

	228 }

	229 }

	230

	231 // Invalid character

	232 else {

	233 --cursor; // Backup and reprocess this character

	234 mode = 0;

	235 }

	236

	237 break;

	238 }

	239

	240 cursor += UTF_CHAR_LENGTH(c);

	241 }

	242

	243 offsets.contextLimit += limit - offsets.limit;

	244 offsets.limit = limit;

	245 // In incremental mode, only advance the cursor up to the last

	246 // open delimiter candidate.

	247 offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;

	248

	249 uprv_free(cbuf);

	250 }

	251

	252 U_NAMESPACE_END

	253

	254 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

OLD	NEW

« no previous file with comments | « icu46/source/i18n/name2uni.h ('k') | icu46/source/i18n/nfrlist.h » ('j') | no next file with comments »