icu46/source/common/unames.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/unames.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 ******************************************************************************

	3 *

	4 * Copyright (C) 1999-2009, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 ******************************************************************************

	8 * file name: unames.c

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created on: 1999oct04

	14 * created by: Markus W. Scherer

	15 */

	16

	17 #include "unicode/utypes.h"

	18 #include "unicode/putil.h"

	19 #include "unicode/uchar.h"

	20 #include "unicode/udata.h"

	21 #include "ustr_imp.h"

	22 #include "umutex.h"

	23 #include "cmemory.h"

	24 #include "cstring.h"

	25 #include "ucln_cmn.h"

	26 #include "udataswp.h"

	27 #include "uprops.h"

	28

	29 /* prototypes ------------------------------------------------------------- */

	30

	31 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))

	32

	33 static const char DATA_NAME[] = "unames";

	34 static const char DATA_TYPE[] = "icu";

	35

	36 #define GROUP_SHIFT 5

	37 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT)

	38 #define GROUP_MASK (LINES_PER_GROUP-1)

	39

	40 /*

	41 * This struct was replaced by explicitly accessing equivalent

	42 * fields from triples of uint16_t.

	43 * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,

	44 * which broke the assumption that sizeof(Group)==6 and that the ++ operator

	45 * would advance by 6 bytes (3 uint16_t).

	46 *

	47 * We can't just change the data structure because it's loaded from a data file,

	48 * and we don't want to make it less compact, so we changed the access code.

	49 *

	50 * For details see ICU tickets 6331 and 6008.

	51 typedef struct {

	52 uint16_t groupMSB,

	53 offsetHigh, offsetLow; / * avoid padding * /

	54 } Group;

	55 */

	56 enum {

	57 GROUP_MSB,

	58 GROUP_OFFSET_HIGH,

	59 GROUP_OFFSET_LOW,

	60 GROUP_LENGTH

	61 };

	62

	63 /*

	64 * Get the 32-bit group offset.

	65 * @param group (const uint16_t *) pointer to a Group triple of uint16_t

	66 * @return group offset (int32_t)

	67 */

	68 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16\|(group) [GROUP_OFFSET_LOW])

	69

	70 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)

	71 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)

	72

	73 typedef struct {

	74 uint32_t start, end;

	75 uint8_t type, variant;

	76 uint16_t size;

	77 } AlgorithmicRange;

	78

	79 typedef struct {

	80 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;

	81 } UCharNames;

	82

	83 /*

	84 * Get the groups table from a UCharNames struct.

	85 * The groups table consists of one uint16_t groupCount followed by

	86 * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH

	87 * and the comment for the old struct Group above.

	88 *

	89 * @param names (const UCharNames *) pointer to the UCharNames indexes

	90 * @return (const uint16_t *) pointer to the groups table

	91 */

	92 #define GET_GROUPS(names) (const uint16_t )((const char )names+names->groupsOf fset)

	93

	94 typedef struct {

	95 const char *otherName;

	96 UChar32 code;

	97 } FindName;

	98

	99 #define DO_FIND_NAME NULL

	100

	101 static UDataMemory *uCharNamesData=NULL;

	102 static UCharNames *uCharNames=NULL;

	103 static UErrorCode gLoadErrorCode=U_ZERO_ERROR;

	104

	105 /*

	106 * Maximum length of character names (regular & 1.0).

	107 */

	108 static int32_t gMaxNameLength=0;

	109

	110 /*

	111 * Set of chars used in character names (regular & 1.0).

	112 * Chars are platform-dependent (can be EBCDIC).

	113 */

	114 static uint32_t gNameSet[8]={ 0 };

	115

	116 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT

	117 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1

	118 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2

	119

	120 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)

	121

	122 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {

	123 "unassigned",

	124 "uppercase letter",

	125 "lowercase letter",

	126 "titlecase letter",

	127 "modifier letter",

	128 "other letter",

	129 "non spacing mark",

	130 "enclosing mark",

	131 "combining spacing mark",

	132 "decimal digit number",

	133 "letter number",

	134 "other number",

	135 "space separator",

	136 "line separator",

	137 "paragraph separator",

	138 "control",

	139 "format",

	140 "private use area",

	141 "surrogate",

	142 "dash punctuation",

	143 "start punctuation",

	144 "end punctuation",

	145 "connector punctuation",

	146 "other punctuation",

	147 "math symbol",

	148 "currency symbol",

	149 "modifier symbol",

	150 "other symbol",

	151 "initial punctuation",

	152 "final punctuation",

	153 "noncharacter",

	154 "lead surrogate",

	155 "trail surrogate"

	156 };

	157

	158 /* implementation ----------------------------------------------------------- */

	159

	160 static UBool U_CALLCONV unames_cleanup(void)

	161 {

	162 if(uCharNamesData) {

	163 udata_close(uCharNamesData);

	164 uCharNamesData = NULL;

	165 }

	166 if(uCharNames) {

	167 uCharNames = NULL;

	168 }

	169 gMaxNameLength=0;

	170 return TRUE;

	171 }

	172

	173 static UBool U_CALLCONV

	174 isAcceptable(void *context,

	175 const char type, const char name,

	176 const UDataInfo *pInfo) {

	177 return (UBool)(

	178 pInfo->size>=20 &&

	179 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&

	180 pInfo->charsetFamily==U_CHARSET_FAMILY &&

	181 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */

	182 pInfo->dataFormat[1]==0x6e &&

	183 pInfo->dataFormat[2]==0x61 &&

	184 pInfo->dataFormat[3]==0x6d &&

	185 pInfo->formatVersion[0]==1);

	186 }

	187

	188 static UBool

	189 isDataLoaded(UErrorCode *pErrorCode) {

	190 /* load UCharNames from file if necessary */

	191 UBool isCached;

	192

	193 /* do this because double-checked locking is broken */

	194 UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);

	195

	196 if(!isCached) {

	197 UCharNames *names;

	198 UDataMemory *data;

	199

	200 /* check error code from previous attempt */

	201 if(U_FAILURE(gLoadErrorCode)) {

	202 *pErrorCode=gLoadErrorCode;

	203 return FALSE;

	204 }

	205

	206 /* open the data outside the mutex block */

	207 data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pE rrorCode);

	208 if(U_FAILURE(*pErrorCode)) {

	209 gLoadErrorCode=*pErrorCode;

	210 return FALSE;

	211 }

	212

	213 names=(UCharNames *)udata_getMemory(data);

	214

	215 /* in the mutex block, set the data for this process */

	216 {

	217 umtx_lock(NULL);

	218 if(uCharNames==NULL) {

	219 uCharNamesData=data;

	220 uCharNames=names;

	221 data=NULL;

	222 names=NULL;

	223 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);

	224 }

	225 umtx_unlock(NULL);

	226 }

	227

	228 /* if a different thread set it first, then close the extra data */

	229 if(data!=NULL) {

	230 udata_close(data); /* NULL if it was set correctly */

	231 }

	232 }

	233 return TRUE;

	234 }

	235

	236 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \

	237 if((bufferLength)>0) { \

	238 *(buffer)++=c; \

	239 --(bufferLength); \

	240 } \

	241 ++(bufferPos); \

	242 }

	243

	244 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT

	245

	246 /*

	247 * Important: expandName() and compareName() are almost the same -

	248 * apply fixes to both.

	249 *

	250 * UnicodeData.txt uses ';' as a field separator, so no

	251 * field can contain ';' as part of its contents.

	252 * In unames.dat, it is marked as token[';']==-1 only if the

	253 * semicolon is used in the data file - which is iff we

	254 * have Unicode 1.0 names or ISO comments or aliases.

	255 * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases

	256 * although we know that it will never be part of a name.

	257 */

	258 static uint16_t

	259 expandName(UCharNames *names,

	260 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,

	261 char *buffer, uint16_t bufferLength) {

	262 uint16_t tokens=(uint16_t )names+8;

	263 uint16_t token, tokenCount=*tokens++, bufferPos=0;

	264 uint8_t tokenStrings=(uint8_t )names+names->tokenStringOffset;

	265 uint8_t c;

	266

	267 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

	268 /*

	269 * skip the modern name if it is not requested _and_

	270 * if the semicolon byte value is a character, not a token number

	271 */

	272 if((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(uint16_t)(-1)) {

	273 int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;

	274 do {

	275 while(nameLength>0) {

	276 --nameLength;

	277 if(*name++==';') {

	278 break;

	279 }

	280 }

	281 } while(--fieldIndex>0);

	282 } else {

	283 /*

	284 * the semicolon byte value is a token number, therefore

	285 * only modern names are stored in unames.dat and there is no

	286 * such requested alternate name here

	287 */

	288 nameLength=0;

	289 }

	290 }

	291

	292 /* write each letter directly, and write a token word per token */

	293 while(nameLength>0) {

	294 --nameLength;

	295 c=*name++;

	296

	297 if(c>=tokenCount) {

	298 if(c!=';') {

	299 /* implicit letter */

	300 WRITE_CHAR(buffer, bufferLength, bufferPos, c);

	301 } else {

	302 /* finished */

	303 break;

	304 }

	305 } else {

	306 token=tokens[c];

	307 if(token==(uint16_t)(-2)) {

	308 /* this is a lead byte for a double-byte token */

	309 token=tokens[c<<8\|*name++];

	310 --nameLength;

	311 }

	312 if(token==(uint16_t)(-1)) {

	313 if(c!=';') {

	314 /* explicit letter */

	315 WRITE_CHAR(buffer, bufferLength, bufferPos, c);

	316 } else {

	317 /* stop, but skip the semicolon if we are seeking

	318 extended names and there was no 2.0 name but there

	319 is a 1.0 name. */

	320 if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {

	321 if ((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(u int16_t)(-1)) {

	322 continue;

	323 }

	324 }

	325 /* finished */

	326 break;

	327 }

	328 } else {

	329 /* write token word */

	330 uint8_t *tokenString=tokenStrings+token;

	331 while((c=*tokenString++)!=0) {

	332 WRITE_CHAR(buffer, bufferLength, bufferPos, c);

	333 }

	334 }

	335 }

	336 }

	337

	338 /* zero-terminate */

	339 if(bufferLength>0) {

	340 *buffer=0;

	341 }

	342

	343 return bufferPos;

	344 }

	345

	346 /*

	347 * compareName() is almost the same as expandName() except that it compares

	348 * the currently expanded name to an input name.

	349 * It returns the match/no match result as soon as possible.

	350 */

	351 static UBool

	352 compareName(UCharNames *names,

	353 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice ,

	354 const char *otherName) {

	355 uint16_t tokens=(uint16_t )names+8;

	356 uint16_t token, tokenCount=*tokens++;

	357 uint8_t tokenStrings=(uint8_t )names+names->tokenStringOffset;

	358 uint8_t c;

	359 const char *origOtherName = otherName;

	360

	361 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

	362 /*

	363 * skip the modern name if it is not requested _and_

	364 * if the semicolon byte value is a character, not a token number

	365 */

	366 if((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(uint16_t)(-1)) {

	367 int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;

	368 do {

	369 while(nameLength>0) {

	370 --nameLength;

	371 if(*name++==';') {

	372 break;

	373 }

	374 }

	375 } while(--fieldIndex>0);

	376 } else {

	377 /*

	378 * the semicolon byte value is a token number, therefore

	379 * only modern names are stored in unames.dat and there is no

	380 * such requested alternate name here

	381 */

	382 nameLength=0;

	383 }

	384 }

	385

	386 /* compare each letter directly, and compare a token word per token */

	387 while(nameLength>0) {

	388 --nameLength;

	389 c=*name++;

	390

	391 if(c>=tokenCount) {

	392 if(c!=';') {

	393 /* implicit letter */

	394 if((char)c!=*otherName++) {

	395 return FALSE;

	396 }

	397 } else {

	398 /* finished */

	399 break;

	400 }

	401 } else {

	402 token=tokens[c];

	403 if(token==(uint16_t)(-2)) {

	404 /* this is a lead byte for a double-byte token */

	405 token=tokens[c<<8\|*name++];

	406 --nameLength;

	407 }

	408 if(token==(uint16_t)(-1)) {

	409 if(c!=';') {

	410 /* explicit letter */

	411 if((char)c!=*otherName++) {

	412 return FALSE;

	413 }

	414 } else {

	415 /* stop, but skip the semicolon if we are seeking

	416 extended names and there was no 2.0 name but there

	417 is a 1.0 name. */

	418 if(otherName == origOtherName && nameChoice == U_EXTENDED_CH AR_NAME) {

	419 if ((uint8_t)';'>=tokenCount \|\| tokens[(uint8_t)';']==(u int16_t)(-1)) {

	420 continue;

	421 }

	422 }

	423 /* finished */

	424 break;

	425 }

	426 } else {

	427 /* write token word */

	428 uint8_t *tokenString=tokenStrings+token;

	429 while((c=*tokenString++)!=0) {

	430 if((char)c!=*otherName++) {

	431 return FALSE;

	432 }

	433 }

	434 }

	435 }

	436 }

	437

	438 /* complete match? */

	439 return (UBool)(*otherName==0);

	440 }

	441

	442 static uint8_t getCharCat(UChar32 cp) {

	443 uint8_t cat;

	444

	445 if (UTF_IS_UNICODE_NONCHAR(cp)) {

	446 return U_NONCHARACTER_CODE_POINT;

	447 }

	448

	449 if ((cat = u_charType(cp)) == U_SURROGATE) {

	450 cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;

	451 }

	452

	453 return cat;

	454 }

	455

	456 static const char *getCharCatName(UChar32 cp) {

	457 uint8_t cat = getCharCat(cp);

	458

	459 /* Return unknown if the table of names above is not up to

	460 date. */

	461

	462 if (cat >= LENGTHOF(charCatNames)) {

	463 return "unknown";

	464 } else {

	465 return charCatNames[cat];

	466 }

	467 }

	468

	469 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {

	470 const char *catname = getCharCatName(code);

	471 uint16_t length = 0;

	472

	473 UChar32 cp;

	474 int ndigits, i;

	475

	476 WRITE_CHAR(buffer, bufferLength, length, '<');

	477 while (catname[length - 1]) {

	478 WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);

	479 }

	480 WRITE_CHAR(buffer, bufferLength, length, '-');

	481 for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)

	482 ;

	483 if (ndigits < 4)

	484 ndigits = 4;

	485 for (cp = code, i = ndigits; (cp \|\| i > 0) && bufferLength; cp >>= 4, buffer Length--) {

	486 uint8_t v = (uint8_t)(cp & 0xf);

	487 buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);

	488 }

	489 buffer += ndigits;

	490 length += ndigits;

	491 WRITE_CHAR(buffer, bufferLength, length, '>');

	492

	493 return length;

	494 }

	495

	496 /*

	497 * getGroup() does a binary search for the group that contains the

	498 * Unicode code point "code".

	499 * The return value is always a valid Group* that may contain "code"

	500 * or else is the highest group before "code".

	501 * If the lowest group is after "code", then that one is returned.

	502 */

	503 static const uint16_t *

	504 getGroup(UCharNames *names, uint32_t code) {

	505 const uint16_t *groups=GET_GROUPS(names);

	506 uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),

	507 start=0,

	508 limit=*groups++,

	509 number;

	510

	511 /* binary search for the group of names that contains the one for code */

	512 while(start<limit-1) {

	513 number=(uint16_t)((start+limit)/2);

	514 if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {

	515 limit=number;

	516 } else {

	517 start=number;

	518 }

	519 }

	520

	521 /* return this regardless of whether it is an exact match */

	522 return groups+start*GROUP_LENGTH;

	523 }

	524

	525 /*

	526 * expandGroupLengths() reads a block of compressed lengths of 32 strings and

	527 * expands them into offsets and lengths for each string.

	528 * Lengths are stored with a variable-width encoding in consecutive nibbles:

	529 * If a nibble<0xc, then it is the length itself (0=empty string).

	530 * If a nibble>=0xc, then it forms a length value with the following nibble.

	531 * Calculation see below.

	532 * The offsets and lengths arrays must be at least 33 (one more) long because

	533 * there is no check here at the end if the last nibble is still used.

	534 */

	535 static const uint8_t *

	536 expandGroupLengths(const uint8_t *s,

	537 uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_P ER_GROUP+1]) {

	538 /* read the lengths of the 32 strings in this group and get each string's of fset */

	539 uint16_t i=0, offset=0, length=0;

	540 uint8_t lengthByte;

	541

	542 /* all 32 lengths must be read to get the offset of the first group string * /

	543 while(i<LINES_PER_GROUP) {

	544 lengthByte=*s++;

	545

	546 /* read even nibble - MSBs of lengthByte */

	547 if(length>=12) {

	548 /* double-nibble length spread across two bytes */

	549 length=(uint16_t)(((length&0x3)<<4\|lengthByte>>4)+12);

	550 lengthByte&=0xf;

	551 } else if((lengthByte /* &0xf0 */)>=0xc0) {

	552 /* double-nibble length spread across this one byte */

	553 length=(uint16_t)((lengthByte&0x3f)+12);

	554 } else {

	555 /* single-nibble length in MSBs */

	556 length=(uint16_t)(lengthByte>>4);

	557 lengthByte&=0xf;

	558 }

	559

	560 *offsets++=offset;

	561 *lengths++=length;

	562

	563 offset+=length;

	564 ++i;

	565

	566 /* read odd nibble - LSBs of lengthByte */

	567 if((lengthByte&0xf0)==0) {

	568 /* this nibble was not consumed for a double-nibble length above */

	569 length=lengthByte;

	570 if(length<12) {

	571 /* single-nibble length in LSBs */

	572 *offsets++=offset;

	573 *lengths++=length;

	574

	575 offset+=length;

	576 ++i;

	577 }

	578 } else {

	579 length=0; /* prevent double-nibble detection in the next iteration */

	580 }

	581 }

	582

	583 /* now, s is at the first group string */

	584 return s;

	585 }

	586

	587 static uint16_t

	588 expandGroupName(UCharNames names, const uint16_t group,

	589 uint16_t lineNumber, UCharNameChoice nameChoice,

	590 char *buffer, uint16_t bufferLength) {

	591 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];

	592 const uint8_t s=(uint8_t )names+names->groupStringOffset+GET_GROUP_OFFSET( group);

	593 s=expandGroupLengths(s, offsets, lengths);

	594 return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameCho ice,

	595 buffer, bufferLength);

	596 }

	597

	598 static uint16_t

	599 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,

	600 char *buffer, uint16_t bufferLength) {

	601 const uint16_t *group=getGroup(names, code);

	602 if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {

	603 return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameCh oice,

	604 buffer, bufferLength);

	605 } else {

	606 /* group not found */

	607 /* zero-terminate */

	608 if(bufferLength>0) {

	609 *buffer=0;

	610 }

	611 return 0;

	612 }

	613 }

	614

	615 /*

	616 * enumGroupNames() enumerates all the names in a 32-group

	617 * and either calls the enumerator function or finds a given input name.

	618 */

	619 static UBool

	620 enumGroupNames(UCharNames names, const uint16_t group,

	621 UChar32 start, UChar32 end,

	622 UEnumCharNamesFn fn, void context,

	623 UCharNameChoice nameChoice) {

	624 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];

	625 const uint8_t s=(uint8_t )names+names->groupStringOffset+GET_GROUP_OFFSET( group);

	626

	627 s=expandGroupLengths(s, offsets, lengths);

	628 if(fn!=DO_FIND_NAME) {

	629 char buffer[200];

	630 uint16_t length;

	631

	632 while(start<=end) {

	633 length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start& GROUP_MASK], nameChoice, buffer, sizeof(buffer));

	634 if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {

	635 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;

	636 }

	637 /* here, we assume that the buffer is large enough */

	638 if(length>0) {

	639 if(!fn(context, start, nameChoice, buffer, length)) {

	640 return FALSE;

	641 }

	642 }

	643 ++start;

	644 }

	645 } else {

	646 const char otherName=((FindName )context)->otherName;

	647 while(start<=end) {

	648 if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GRO UP_MASK], nameChoice, otherName)) {

	649 ((FindName *)context)->code=start;

	650 return FALSE;

	651 }

	652 ++start;

	653 }

	654 }

	655 return TRUE;

	656 }

	657

	658 /*

	659 * enumExtNames enumerate extended names.

	660 * It only needs to do it if it is called with a real function and not

	661 * with the dummy DO_FIND_NAME, because u_charFromName() does a check

	662 * for extended names by itself.

	663 */

	664 static UBool

	665 enumExtNames(UChar32 start, UChar32 end,

	666 UEnumCharNamesFn fn, void context)

	667 {

	668 if(fn!=DO_FIND_NAME) {

	669 char buffer[200];

	670 uint16_t length;

	671

	672 while(start<=end) {

	673 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;

	674 /* here, we assume that the buffer is large enough */

	675 if(length>0) {

	676 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {

	677 return FALSE;

	678 }

	679 }

	680 ++start;

	681 }

	682 }

	683

	684 return TRUE;

	685 }

	686

	687 static UBool

	688 enumNames(UCharNames *names,

	689 UChar32 start, UChar32 limit,

	690 UEnumCharNamesFn fn, void context,

	691 UCharNameChoice nameChoice) {

	692 uint16_t startGroupMSB, endGroupMSB, groupCount;

	693 const uint16_t group, groupLimit;

	694

	695 startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);

	696 endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);

	697

	698 /* find the group that contains start, or the highest before it */

	699 group=getGroup(names, start);

	700

	701 if(startGroupMSB==endGroupMSB) {

	702 if(startGroupMSB==group[GROUP_MSB]) {

	703 /* if start and limit-1 are in the same group, then enumerate only i n that one */

	704 return enumGroupNames(names, group, start, limit-1, fn, context, nam eChoice);

	705 }

	706 } else {

	707 const uint16_t *groups=GET_GROUPS(names);

	708 groupCount=*groups++;

	709 groupLimit=groups+groupCount*GROUP_LENGTH;

	710

	711 if(startGroupMSB==group[GROUP_MSB]) {

	712 /* enumerate characters in the partial start group */

	713 if((start&GROUP_MASK)!=0) {

	714 if(!enumGroupNames(names, group,

	715 start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+ LINES_PER_GROUP-1,

	716 fn, context, nameChoice)) {

	717 return FALSE;

	718 }

	719 group=NEXT_GROUP(group); /* continue with the next group */

	720 }

	721 } else if(startGroupMSB>group[GROUP_MSB]) {

	722 /* make sure that we start enumerating with the first group after st art */

	723 const uint16_t *nextGroup=NEXT_GROUP(group);

	724 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB & & nameChoice == U_EXTENDED_CHAR_NAME) {

	725 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;

	726 if (end > limit) {

	727 end = limit;

	728 }

	729 if (!enumExtNames(start, end - 1, fn, context)) {

	730 return FALSE;

	731 }

	732 }

	733 group=nextGroup;

	734 }

	735

	736 /* enumerate entire groups between the start- and end-groups */

	737 while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {

	738 const uint16_t *nextGroup;

	739 start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;

	740 if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {

	741 return FALSE;

	742 }

	743 nextGroup=NEXT_GROUP(group);

	744 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB ] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {

	745 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;

	746 if (end > limit) {

	747 end = limit;

	748 }

	749 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1 , fn, context)) {

	750 return FALSE;

	751 }

	752 }

	753 group=nextGroup;

	754 }

	755

	756 /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */

	757 if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {

	758 return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);

	759 } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {

	760 UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;

	761 if (next > start) {

	762 start = next;

	763 }

	764 } else {

	765 return TRUE;

	766 }

	767 }

	768

	769 /* we have not found a group, which means everything is made of

	770 extended names. */

	771 if (nameChoice == U_EXTENDED_CHAR_NAME) {

	772 if (limit > UCHAR_MAX_VALUE + 1) {

	773 limit = UCHAR_MAX_VALUE + 1;

	774 }

	775 return enumExtNames(start, limit - 1, fn, context);

	776 }

	777

	778 return TRUE;

	779 }

	780

	781 static uint16_t

	782 writeFactorSuffix(const uint16_t *factors, uint16_t count,

	783 const char s, / suffix elements */

	784 uint32_t code,

	785 uint16_t indexes[8], /* output fields from here */

	786 const char elementBases[8], const char elements[8],

	787 char *buffer, uint16_t bufferLength) {

	788 uint16_t i, factor, bufferPos=0;

	789 char c;

	790

	791 /* write elements according to the factors */

	792

	793 /*

	794 * the factorized elements are determined by modulo arithmetic

	795 * with the factors of this algorithm

	796 *

	797 * note that for fewer operations, count is decremented here

	798 */

	799 --count;

	800 for(i=count; i>0; --i) {

	801 factor=factors[i];

	802 indexes[i]=(uint16_t)(code%factor);

	803 code/=factor;

	804 }

	805 /*

	806 * we don't need to calculate the last modulus because start<=code<=end

	807 * guarantees here that code<=factors[0]

	808 */

	809 indexes[0]=(uint16_t)code;

	810

	811 /* write each element */

	812 for(;;) {

	813 if(elementBases!=NULL) {

	814 *elementBases++=s;

	815 }

	816

	817 /* skip indexes[i] strings */

	818 factor=indexes[i];

	819 while(factor>0) {

	820 while(*s++!=0) {}

	821 --factor;

	822 }

	823 if(elements!=NULL) {

	824 *elements++=s;

	825 }

	826

	827 /* write element */

	828 while((c=*s++)!=0) {

	829 WRITE_CHAR(buffer, bufferLength, bufferPos, c);

	830 }

	831

	832 /* we do not need to perform the rest of this loop for i==count - break here */

	833 if(i>=count) {

	834 break;

	835 }

	836

	837 /* skip the rest of the strings for this factors[i] */

	838 factor=(uint16_t)(factors[i]-indexes[i]-1);

	839 while(factor>0) {

	840 while(*s++!=0) {}

	841 --factor;

	842 }

	843

	844 ++i;

	845 }

	846

	847 /* zero-terminate */

	848 if(bufferLength>0) {

	849 *buffer=0;

	850 }

	851

	852 return bufferPos;

	853 }

	854

	855 /*

	856 * Important:

	857 * Parts of findAlgName() are almost the same as some of getAlgName().

	858 * Fixes must be applied to both.

	859 */

	860 static uint16_t

	861 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,

	862 char *buffer, uint16_t bufferLength) {

	863 uint16_t bufferPos=0;

	864

	865 /* Only the normative character name can be algorithmic. */

	866 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

	867 /* zero-terminate */

	868 if(bufferLength>0) {

	869 *buffer=0;

	870 }

	871 return 0;

	872 }

	873

	874 switch(range->type) {

	875 case 0: {

	876 /* name = prefix hex-digits */

	877 const char s=(const char )(range+1);

	878 char c;

	879

	880 uint16_t i, count;

	881

	882 /* copy prefix */

	883 while((c=*s++)!=0) {

	884 WRITE_CHAR(buffer, bufferLength, bufferPos, c);

	885 }

	886

	887 /* write hexadecimal code point value */

	888 count=range->variant;

	889

	890 /* zero-terminate */

	891 if(count<bufferLength) {

	892 buffer[count]=0;

	893 }

	894

	895 for(i=count; i>0;) {

	896 if(--i<bufferLength) {

	897 c=(char)(code&0xf);

	898 if(c<10) {

	899 c+='0';

	900 } else {

	901 c+='A'-10;

	902 }

	903 buffer[i]=c;

	904 }

	905 code>>=4;

	906 }

	907

	908 bufferPos+=count;

	909 break;

	910 }

	911 case 1: {

	912 /* name = prefix factorized-elements */

	913 uint16_t indexes[8];

	914 const uint16_t factors=(const uint16_t )(range+1);

	915 uint16_t count=range->variant;

	916 const char s=(const char )(factors+count);

	917 char c;

	918

	919 /* copy prefix */

	920 while((c=*s++)!=0) {

	921 WRITE_CHAR(buffer, bufferLength, bufferPos, c);

	922 }

	923

	924 bufferPos+=writeFactorSuffix(factors, count,

	925 s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);

	926 break;

	927 }

	928 default:

	929 /* undefined type */

	930 /* zero-terminate */

	931 if(bufferLength>0) {

	932 *buffer=0;

	933 }

	934 break;

	935 }

	936

	937 return bufferPos;

	938 }

	939

	940 /*

	941 * Important: enumAlgNames() and findAlgName() are almost the same.

	942 * Any fix must be applied to both.

	943 */

	944 static UBool

	945 enumAlgNames(AlgorithmicRange *range,

	946 UChar32 start, UChar32 limit,

	947 UEnumCharNamesFn fn, void context,

	948 UCharNameChoice nameChoice) {

	949 char buffer[200];

	950 uint16_t length;

	951

	952 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

	953 return TRUE;

	954 }

	955

	956 switch(range->type) {

	957 case 0: {

	958 char s, end;

	959 char c;

	960

	961 /* get the full name of the start character */

	962 length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buf fer));

	963 if(length<=0) {

	964 return TRUE;

	965 }

	966

	967 /* call the enumerator function with this first character */

	968 if(!fn(context, start, nameChoice, buffer, length)) {

	969 return FALSE;

	970 }

	971

	972 /* go to the end of the name; all these names have the same length */

	973 end=buffer;

	974 while(*end!=0) {

	975 ++end;

	976 }

	977

	978 /* enumerate the rest of the names */

	979 while(++start<limit) {

	980 /* increment the hexadecimal number on a character-basis */

	981 s=end;

	982 for (;;) {

	983 c=*--s;

	984 if(('0'<=c && c<'9') \|\| ('A'<=c && c<'F')) {

	985 *s=(char)(c+1);

	986 break;

	987 } else if(c=='9') {

	988 *s='A';

	989 break;

	990 } else if(c=='F') {

	991 *s='0';

	992 }

	993 }

	994

	995 if(!fn(context, start, nameChoice, buffer, length)) {

	996 return FALSE;

	997 }

	998 }

	999 break;

	1000 }

	1001 case 1: {

	1002 uint16_t indexes[8];

	1003 const char elementBases[8], elements[8];

	1004 const uint16_t factors=(const uint16_t )(range+1);

	1005 uint16_t count=range->variant;

	1006 const char s=(const char )(factors+count);

	1007 char suffix, t;

	1008 uint16_t prefixLength, i, idx;

	1009

	1010 char c;

	1011

	1012 /* name = prefix factorized-elements */

	1013

	1014 /* copy prefix */

	1015 suffix=buffer;

	1016 prefixLength=0;

	1017 while((c=*s++)!=0) {

	1018 *suffix++=c;

	1019 ++prefixLength;

	1020 }

	1021

	1022 /* append the suffix of the start character */

	1023 length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,

	1024 s, (uint32_t)start-range->start,

	1025 indexes, elementBases, elements,

	1026 suffix, (uint16_t)(sizeof(buffer)- prefixLength)));

	1027

	1028 /* call the enumerator function with this first character */

	1029 if(!fn(context, start, nameChoice, buffer, length)) {

	1030 return FALSE;

	1031 }

	1032

	1033 /* enumerate the rest of the names */

	1034 while(++start<limit) {

	1035 /* increment the indexes in lexical order bound by the factors */

	1036 i=count;

	1037 for (;;) {

	1038 idx=(uint16_t)(indexes[--i]+1);

	1039 if(idx<factors[i]) {

	1040 /* skip one index and its element string */

	1041 indexes[i]=idx;

	1042 s=elements[i];

	1043 while(*s++!=0) {

	1044 }

	1045 elements[i]=s;

	1046 break;

	1047 } else {

	1048 /* reset this index to 0 and its element string to the first one */

	1049 indexes[i]=0;

	1050 elements[i]=elementBases[i];

	1051 }

	1052 }

	1053

	1054 /* to make matters a little easier, just append all elements to the suffix */

	1055 t=suffix;

	1056 length=prefixLength;

	1057 for(i=0; i<count; ++i) {

	1058 s=elements[i];

	1059 while((c=*s++)!=0) {

	1060 *t++=c;

	1061 ++length;

	1062 }

	1063 }

	1064 /* zero-terminate */

	1065 *t=0;

	1066

	1067 if(!fn(context, start, nameChoice, buffer, length)) {

	1068 return FALSE;

	1069 }

	1070 }

	1071 break;

	1072 }

	1073 default:

	1074 /* undefined type */

	1075 break;

	1076 }

	1077

	1078 return TRUE;

	1079 }

	1080

	1081 /*

	1082 * findAlgName() is almost the same as enumAlgNames() except that it

	1083 * returns the code point for a name if it fits into the range.

	1084 * It returns 0xffff otherwise.

	1085 */

	1086 static UChar32

	1087 findAlgName(AlgorithmicRange range, UCharNameChoice nameChoice, const char oth erName) {

	1088 UChar32 code;

	1089

	1090 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

	1091 return 0xffff;

	1092 }

	1093

	1094 switch(range->type) {

	1095 case 0: {

	1096 /* name = prefix hex-digits */

	1097 const char s=(const char )(range+1);

	1098 char c;

	1099

	1100 uint16_t i, count;

	1101

	1102 /* compare prefix */

	1103 while((c=*s++)!=0) {

	1104 if((char)c!=*otherName++) {

	1105 return 0xffff;

	1106 }

	1107 }

	1108

	1109 /* read hexadecimal code point value */

	1110 count=range->variant;

	1111 code=0;

	1112 for(i=0; i<count; ++i) {

	1113 c=*otherName++;

	1114 if('0'<=c && c<='9') {

	1115 code=(code<<4)\|(c-'0');

	1116 } else if('A'<=c && c<='F') {

	1117 code=(code<<4)\|(c-'A'+10);

	1118 } else {

	1119 return 0xffff;

	1120 }

	1121 }

	1122

	1123 /* does it fit into the range? */

	1124 if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=rang e->end) {

	1125 return code;

	1126 }

	1127 break;

	1128 }

	1129 case 1: {

	1130 char buffer[64];

	1131 uint16_t indexes[8];

	1132 const char elementBases[8], elements[8];

	1133 const uint16_t factors=(const uint16_t )(range+1);

	1134 uint16_t count=range->variant;

	1135 const char s=(const char )(factors+count), *t;

	1136 UChar32 start, limit;

	1137 uint16_t i, idx;

	1138

	1139 char c;

	1140

	1141 /* name = prefix factorized-elements */

	1142

	1143 /* compare prefix */

	1144 while((c=*s++)!=0) {

	1145 if((char)c!=*otherName++) {

	1146 return 0xffff;

	1147 }

	1148 }

	1149

	1150 start=(UChar32)range->start;

	1151 limit=(UChar32)(range->end+1);

	1152

	1153 /* initialize the suffix elements for enumeration; indexes should all be set to 0 */

	1154 writeFactorSuffix(factors, count, s, 0,

	1155 indexes, elementBases, elements, buffer, sizeof(buffer ));

	1156

	1157 /* compare the first suffix */

	1158 if(0==uprv_strcmp(otherName, buffer)) {

	1159 return start;

	1160 }

	1161

	1162 /* enumerate and compare the rest of the suffixes */

	1163 while(++start<limit) {

	1164 /* increment the indexes in lexical order bound by the factors */

	1165 i=count;

	1166 for (;;) {

	1167 idx=(uint16_t)(indexes[--i]+1);

	1168 if(idx<factors[i]) {

	1169 /* skip one index and its element string */

	1170 indexes[i]=idx;

	1171 s=elements[i];

	1172 while(*s++!=0) {}

	1173 elements[i]=s;

	1174 break;

	1175 } else {

	1176 /* reset this index to 0 and its element string to the first one */

	1177 indexes[i]=0;

	1178 elements[i]=elementBases[i];

	1179 }

	1180 }

	1181

	1182 /* to make matters a little easier, just compare all elements of the suffix */

	1183 t=otherName;

	1184 for(i=0; i<count; ++i) {

	1185 s=elements[i];

	1186 while((c=*s++)!=0) {

	1187 if(c!=*t++) {

	1188 s=""; /* does not match */

	1189 i=99;

	1190 }

	1191 }

	1192 }

	1193 if(i<99 && *t==0) {

	1194 return start;

	1195 }

	1196 }

	1197 break;

	1198 }

	1199 default:

	1200 /* undefined type */

	1201 break;

	1202 }

	1203

	1204 return 0xffff;

	1205 }

	1206

	1207 /* sets of name characters, maximum name lengths ---------------------------- */

	1208

	1209 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]\|=((uint32_t)1<<((uint8_t)c&0x1f)))

	1210 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x 1f)))!=0)

	1211

	1212 static int32_t

	1213 calcStringSetLength(uint32_t set[8], const char *s) {

	1214 int32_t length=0;

	1215 char c;

	1216

	1217 while((c=*s++)!=0) {

	1218 SET_ADD(set, c);

	1219 ++length;

	1220 }

	1221 return length;

	1222 }

	1223

	1224 static int32_t

	1225 calcAlgNameSetsLengths(int32_t maxNameLength) {

	1226 AlgorithmicRange *range;

	1227 uint32_t *p;

	1228 uint32_t rangeCount;

	1229 int32_t length;

	1230

	1231 /* enumerate algorithmic ranges */

	1232 p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);

	1233 rangeCount=*p;

	1234 range=(AlgorithmicRange *)(p+1);

	1235 while(rangeCount>0) {

	1236 switch(range->type) {

	1237 case 0:

	1238 /* name = prefix + (range->variant times) hex-digits */

	1239 /* prefix */

	1240 length=calcStringSetLength(gNameSet, (const char *)(range+1))+range- >variant;

	1241 if(length>maxNameLength) {

	1242 maxNameLength=length;

	1243 }

	1244 break;

	1245 case 1: {

	1246 /* name = prefix factorized-elements */

	1247 const uint16_t factors=(const uint16_t )(range+1);

	1248 const char *s;

	1249 int32_t i, count=range->variant, factor, factorLength, maxFactorLeng th;

	1250

	1251 /* prefix length */

	1252 s=(const char *)(factors+count);

	1253 length=calcStringSetLength(gNameSet, s);

	1254 s+=length+1; /* start of factor suffixes */

	1255

	1256 /* get the set and maximum factor suffix length for each factor */

	1257 for(i=0; i<count; ++i) {

	1258 maxFactorLength=0;

	1259 for(factor=factors[i]; factor>0; --factor) {

	1260 factorLength=calcStringSetLength(gNameSet, s);

	1261 s+=factorLength+1;

	1262 if(factorLength>maxFactorLength) {

	1263 maxFactorLength=factorLength;

	1264 }

	1265 }

	1266 length+=maxFactorLength;

	1267 }

	1268

	1269 if(length>maxNameLength) {

	1270 maxNameLength=length;

	1271 }

	1272 break;

	1273 }

	1274 default:

	1275 /* unknown type */

	1276 break;

	1277 }

	1278

	1279 range=(AlgorithmicRange )((uint8_t )range+range->size);

	1280 --rangeCount;

	1281 }

	1282 return maxNameLength;

	1283 }

	1284

	1285 static int32_t

	1286 calcExtNameSetsLengths(int32_t maxNameLength) {

	1287 int32_t i, length;

	1288

	1289 for(i=0; i<LENGTHOF(charCatNames); ++i) {

	1290 /*

	1291 * for each category, count the length of the category name

	1292 * plus 9=

	1293 * 2 for <>

	1294 * 1 for -

	1295 * 6 for most hex digits per code point

	1296 */

	1297 length=9+calcStringSetLength(gNameSet, charCatNames[i]);

	1298 if(length>maxNameLength) {

	1299 maxNameLength=length;

	1300 }

	1301 }

	1302 return maxNameLength;

	1303 }

	1304

	1305 static int32_t

	1306 calcNameSetLength(const uint16_t tokens, uint16_t tokenCount, const uint8_t to kenStrings, int8_t *tokenLengths,

	1307 uint32_t set[8],

	1308 const uint8_t *pLine, const uint8_t lineLimit) {

	1309 const uint8_t line=pLine;

	1310 int32_t length=0, tokenLength;

	1311 uint16_t c, token;

	1312

	1313 while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {

	1314 if(c>=tokenCount) {

	1315 /* implicit letter */

	1316 SET_ADD(set, c);

	1317 ++length;

	1318 } else {

	1319 token=tokens[c];

	1320 if(token==(uint16_t)(-2)) {

	1321 /* this is a lead byte for a double-byte token */

	1322 c=c<<8\|*line++;

	1323 token=tokens[c];

	1324 }

	1325 if(token==(uint16_t)(-1)) {

	1326 /* explicit letter */

	1327 SET_ADD(set, c);

	1328 ++length;

	1329 } else {

	1330 /* count token word */

	1331 if(tokenLengths!=NULL) {

	1332 /* use cached token length */

	1333 tokenLength=tokenLengths[c];

	1334 if(tokenLength==0) {

	1335 tokenLength=calcStringSetLength(set, (const char *)token Strings+token);

	1336 tokenLengths[c]=(int8_t)tokenLength;

	1337 }

	1338 } else {

	1339 tokenLength=calcStringSetLength(set, (const char *)tokenStri ngs+token);

	1340 }

	1341 length+=tokenLength;

	1342 }

	1343 }

	1344 }

	1345

	1346 *pLine=line;

	1347 return length;

	1348 }

	1349

	1350 static void

	1351 calcGroupNameSetsLengths(int32_t maxNameLength) {

	1352 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];

	1353

	1354 uint16_t tokens=(uint16_t )uCharNames+8;

	1355 uint16_t tokenCount=*tokens++;

	1356 uint8_t tokenStrings=(uint8_t )uCharNames+uCharNames->tokenStringOffset;

	1357

	1358 int8_t *tokenLengths;

	1359

	1360 const uint16_t *group;

	1361 const uint8_t s, line, *lineLimit;

	1362

	1363 int32_t groupCount, lineNumber, length;

	1364

	1365 tokenLengths=(int8_t *)uprv_malloc(tokenCount);

	1366 if(tokenLengths!=NULL) {

	1367 uprv_memset(tokenLengths, 0, tokenCount);

	1368 }

	1369

	1370 group=GET_GROUPS(uCharNames);

	1371 groupCount=*group++;

	1372

	1373 /* enumerate all groups */

	1374 while(groupCount>0) {

	1375 s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(g roup);

	1376 s=expandGroupLengths(s, offsets, lengths);

	1377

	1378 /* enumerate all lines in each group */

	1379 for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {

	1380 line=s+offsets[lineNumber];

	1381 length=lengths[lineNumber];

	1382 if(length==0) {

	1383 continue;

	1384 }

	1385

	1386 lineLimit=line+length;

	1387

	1388 /* read regular name */

	1389 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLeng ths, gNameSet, &line, lineLimit);

	1390 if(length>maxNameLength) {

	1391 maxNameLength=length;

	1392 }

	1393 if(line==lineLimit) {

	1394 continue;

	1395 }

	1396

	1397 /* read Unicode 1.0 name */

	1398 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLeng ths, gNameSet, &line, lineLimit);

	1399 if(length>maxNameLength) {

	1400 maxNameLength=length;

	1401 }

	1402 if(line==lineLimit) {

	1403 continue;

	1404 }

	1405

	1406 /* read ISO comment */

	1407 /length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLe ngths, gISOCommentSet, &line, lineLimit);/

	1408 }

	1409

	1410 group=NEXT_GROUP(group);

	1411 --groupCount;

	1412 }

	1413

	1414 if(tokenLengths!=NULL) {

	1415 uprv_free(tokenLengths);

	1416 }

	1417

	1418 /* set gMax... - name length last for threading */

	1419 gMaxNameLength=maxNameLength;

	1420 }

	1421

	1422 static UBool

	1423 calcNameSetsLengths(UErrorCode *pErrorCode) {

	1424 static const char extChars[]="0123456789ABCDEF<>-";

	1425 int32_t i, maxNameLength;

	1426

	1427 if(gMaxNameLength!=0) {

	1428 return TRUE;

	1429 }

	1430

	1431 if(!isDataLoaded(pErrorCode)) {

	1432 return FALSE;

	1433 }

	1434

	1435 /* set hex digits, used in various names, and <>-, used in extended names */

	1436 for(i=0; i<sizeof(extChars)-1; ++i) {

	1437 SET_ADD(gNameSet, extChars[i]);

	1438 }

	1439

	1440 /* set sets and lengths from algorithmic names */

	1441 maxNameLength=calcAlgNameSetsLengths(0);

	1442

	1443 /* set sets and lengths from extended names */

	1444 maxNameLength=calcExtNameSetsLengths(maxNameLength);

	1445

	1446 /* set sets and lengths from group names, set global maximum values */

	1447 calcGroupNameSetsLengths(maxNameLength);

	1448

	1449 return TRUE;

	1450 }

	1451

	1452 /* public API --------------------------------------------------------------- */

	1453

	1454 U_CAPI int32_t U_EXPORT2

	1455 u_charName(UChar32 code, UCharNameChoice nameChoice,

	1456 char *buffer, int32_t bufferLength,

	1457 UErrorCode *pErrorCode) {

	1458 AlgorithmicRange *algRange;

	1459 uint32_t *p;

	1460 uint32_t i;

	1461 int32_t length;

	1462

	1463 /* check the argument values */

	1464 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	1465 return 0;

	1466 } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\|

	1467 bufferLength<0 \|\| (bufferLength>0 && buffer==NULL)

	1468 ) {

	1469 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	1470 return 0;

	1471 }

	1472

	1473 if((uint32_t)code>UCHAR_MAX_VALUE \|\| !isDataLoaded(pErrorCode)) {

	1474 return u_terminateChars(buffer, bufferLength, 0, pErrorCode);

	1475 }

	1476

	1477 length=0;

	1478

	1479 /* try algorithmic names first */

	1480 p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);

	1481 i=*p;

	1482 algRange=(AlgorithmicRange *)(p+1);

	1483 while(i>0) {

	1484 if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {

	1485 length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uin t16_t)bufferLength);

	1486 break;

	1487 }

	1488 algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);

	1489 --i;

	1490 }

	1491

	1492 if(i==0) {

	1493 if (nameChoice == U_EXTENDED_CHAR_NAME) {

	1494 length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);

	1495 if (!length) {

	1496 /* extended character name */

	1497 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLe ngth);

	1498 }

	1499 } else {

	1500 /* normal character name */

	1501 length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint 16_t)bufferLength);

	1502 }

	1503 }

	1504

	1505 return u_terminateChars(buffer, bufferLength, length, pErrorCode);

	1506 }

	1507

	1508 U_CAPI int32_t U_EXPORT2

	1509 u_getISOComment(UChar32 c,

	1510 char *dest, int32_t destCapacity,

	1511 UErrorCode *pErrorCode) {

	1512 int32_t length;

	1513

	1514 /* check the argument values */

	1515 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	1516 return 0;

	1517 } else if(destCapacity<0 \|\| (destCapacity>0 && dest==NULL)) {

	1518 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	1519 return 0;

	1520 }

	1521

	1522 if((uint32_t)c>UCHAR_MAX_VALUE \|\| !isDataLoaded(pErrorCode)) {

	1523 return u_terminateChars(dest, destCapacity, 0, pErrorCode);

	1524 }

	1525

	1526 /* the ISO comment is stored like a normal character name */

	1527 length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destC apacity);

	1528 return u_terminateChars(dest, destCapacity, length, pErrorCode);

	1529 }

	1530

	1531 U_CAPI UChar32 U_EXPORT2

	1532 u_charFromName(UCharNameChoice nameChoice,

	1533 const char *name,

	1534 UErrorCode *pErrorCode) {

	1535 char upper[120], lower[120];

	1536 FindName findName;

	1537 AlgorithmicRange *algRange;

	1538 uint32_t *p;

	1539 uint32_t i;

	1540 UChar32 cp = 0;

	1541 char c0;

	1542 UChar32 error = 0xffff; /* Undefined, but use this for backwards compati bility. */

	1543

	1544 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	1545 return error;

	1546 }

	1547

	1548 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\| name==NULL \|\| *name==0) {

	1549 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	1550 return error;

	1551 }

	1552

	1553 if(!isDataLoaded(pErrorCode)) {

	1554 return error;

	1555 }

	1556

	1557 /* construct the uppercase and lowercase of the name first */

	1558 for(i=0; i<sizeof(upper); ++i) {

	1559 if((c0=*name++)!=0) {

	1560 upper[i]=uprv_toupper(c0);

	1561 lower[i]=uprv_tolower(c0);

	1562 } else {

	1563 upper[i]=lower[i]=0;

	1564 break;

	1565 }

	1566 }

	1567 if(i==sizeof(upper)) {

	1568 /* name too long, there is no such character */

	1569 *pErrorCode = U_ILLEGAL_CHAR_FOUND;

	1570 return error;

	1571 }

	1572

	1573 /* try extended names first */

	1574 if (lower[0] == '<') {

	1575 if (nameChoice == U_EXTENDED_CHAR_NAME) {

	1576 if (lower[--i] == '>') {

	1577 for (--i; lower[i] && lower[i] != '-'; --i) {

	1578 }

	1579

	1580 if (lower[i] == '-') { /* We've got a category. */

	1581 uint32_t cIdx;

	1582

	1583 lower[i] = 0;

	1584

	1585 for (++i; lower[i] != '>'; ++i) {

	1586 if (lower[i] >= '0' && lower[i] <= '9') {

	1587 cp = (cp << 4) + lower[i] - '0';

	1588 } else if (lower[i] >= 'a' && lower[i] <= 'f') {

	1589 cp = (cp << 4) + lower[i] - 'a' + 10;

	1590 } else {

	1591 *pErrorCode = U_ILLEGAL_CHAR_FOUND;

	1592 return error;

	1593 }

	1594 }

	1595

	1596 /* Now validate the category name.

	1597 We could use a binary search, or a trie, if

	1598 we really wanted to. */

	1599

	1600 for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {

	1601

	1602 if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {

	1603 if (getCharCat(cp) == cIdx) {

	1604 return cp;

	1605 }

	1606 break;

	1607 }

	1608 }

	1609 }

	1610 }

	1611 }

	1612

	1613 *pErrorCode = U_ILLEGAL_CHAR_FOUND;

	1614 return error;

	1615 }

	1616

	1617 /* try algorithmic names now */

	1618 p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);

	1619 i=*p;

	1620 algRange=(AlgorithmicRange *)(p+1);

	1621 while(i>0) {

	1622 if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {

	1623 return cp;

	1624 }

	1625 algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);

	1626 --i;

	1627 }

	1628

	1629 /* normal character name */

	1630 findName.otherName=upper;

	1631 findName.code=error;

	1632 enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameC hoice);

	1633 if (findName.code == error) {

	1634 *pErrorCode = U_ILLEGAL_CHAR_FOUND;

	1635 }

	1636 return findName.code;

	1637 }

	1638

	1639 U_CAPI void U_EXPORT2

	1640 u_enumCharNames(UChar32 start, UChar32 limit,

	1641 UEnumCharNamesFn *fn,

	1642 void *context,

	1643 UCharNameChoice nameChoice,

	1644 UErrorCode *pErrorCode) {

	1645 AlgorithmicRange *algRange;

	1646 uint32_t *p;

	1647 uint32_t i;

	1648

	1649 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	1650 return;

	1651 }

	1652

	1653 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\| fn==NULL) {

	1654 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	1655 return;

	1656 }

	1657

	1658 if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {

	1659 limit = UCHAR_MAX_VALUE + 1;

	1660 }

	1661 if((uint32_t)start>=(uint32_t)limit) {

	1662 return;

	1663 }

	1664

	1665 if(!isDataLoaded(pErrorCode)) {

	1666 return;

	1667 }

	1668

	1669 /* interleave the data-driven ones with the algorithmic ones */

	1670 /* iterate over all algorithmic ranges; assume that they are in ascending or der */

	1671 p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);

	1672 i=*p;

	1673 algRange=(AlgorithmicRange *)(p+1);

	1674 while(i>0) {

	1675 /* enumerate the character names before the current algorithmic range */

	1676 /* here: start<limit */

	1677 if((uint32_t)start<algRange->start) {

	1678 if((uint32_t)limit<=algRange->start) {

	1679 enumNames(uCharNames, start, limit, fn, context, nameChoice);

	1680 return;

	1681 }

	1682 if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, conte xt, nameChoice)) {

	1683 return;

	1684 }

	1685 start=(UChar32)algRange->start;

	1686 }

	1687 /* enumerate the character names in the current algorithmic range */

	1688 /* here: algRange->start<=start<limit */

	1689 if((uint32_t)start<=algRange->end) {

	1690 if((uint32_t)limit<=(algRange->end+1)) {

	1691 enumAlgNames(algRange, start, limit, fn, context, nameChoice);

	1692 return;

	1693 }

	1694 if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, cont ext, nameChoice)) {

	1695 return;

	1696 }

	1697 start=(UChar32)algRange->end+1;

	1698 }

	1699 /* continue to the next algorithmic range (here: start<limit) */

	1700 algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);

	1701 --i;

	1702 }

	1703 /* enumerate the character names after the last algorithmic range */

	1704 enumNames(uCharNames, start, limit, fn, context, nameChoice);

	1705 }

	1706

	1707 U_CAPI int32_t U_EXPORT2

	1708 uprv_getMaxCharNameLength() {

	1709 UErrorCode errorCode=U_ZERO_ERROR;

	1710 if(calcNameSetsLengths(&errorCode)) {

	1711 return gMaxNameLength;

	1712 } else {

	1713 return 0;

	1714 }

	1715 }

	1716

	1717 /**

	1718 * Converts the char set cset into a Unicode set uset.

	1719 * @param cset Set of 256 bit flags corresponding to a set of chars.

	1720 * @param uset USet to receive characters. Existing contents are deleted.

	1721 */

	1722 static void

	1723 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {

	1724 UChar us[256];

	1725 char cs[256];

	1726

	1727 int32_t i, length;

	1728 UErrorCode errorCode;

	1729

	1730 errorCode=U_ZERO_ERROR;

	1731

	1732 if(!calcNameSetsLengths(&errorCode)) {

	1733 return;

	1734 }

	1735

	1736 /* build a char string with all chars that are used in character names */

	1737 length=0;

	1738 for(i=0; i<256; ++i) {

	1739 if(SET_CONTAINS(cset, i)) {

	1740 cs[length++]=(char)i;

	1741 }

	1742 }

	1743

	1744 /* convert the char string to a UChar string */

	1745 u_charsToUChars(cs, us, length);

	1746

	1747 /* add each UChar to the USet */

	1748 for(i=0; i<length; ++i) {

	1749 if(us[i]!=0 \|\| cs[i]==0) { /* non-invariant chars become (UChar)0 */

	1750 sa->add(sa->set, us[i]);

	1751 }

	1752 }

	1753 }

	1754

	1755 /**

	1756 * Fills set with characters that are used in Unicode character names.

	1757 * @param set USet to receive characters.

	1758 */

	1759 U_CAPI void U_EXPORT2

	1760 uprv_getCharNameCharacters(const USetAdder *sa) {

	1761 charSetToUSet(gNameSet, sa);

	1762 }

	1763

	1764 /* data swapping ------------------------------------------------------------ */

	1765

	1766 /*

	1767 * The token table contains non-negative entries for token bytes,

	1768 * and -1 for bytes that represent themselves in the data file's charset.

	1769 * -2 entries are used for lead bytes.

	1770 *

	1771 * Direct bytes (-1 entries) must be translated from the input charset family

	1772 * to the output charset family.

	1773 * makeTokenMap() writes a permutation mapping for this.

	1774 * Use it once for single-/lead-byte tokens and once more for all trail byte

	1775 * tokens. (';' is an unused trail byte marked with -1.)

	1776 */

	1777 static void

	1778 makeTokenMap(const UDataSwapper *ds,

	1779 int16_t tokens[], uint16_t tokenCount,

	1780 uint8_t map[256],

	1781 UErrorCode *pErrorCode) {

	1782 UBool usedOutChar[256];

	1783 uint16_t i, j;

	1784 uint8_t c1, c2;

	1785

	1786 if(U_FAILURE(*pErrorCode)) {

	1787 return;

	1788 }

	1789

	1790 if(ds->inCharset==ds->outCharset) {

	1791 /* Same charset family: identity permutation */

	1792 for(i=0; i<256; ++i) {

	1793 map[i]=(uint8_t)i;

	1794 }

	1795 } else {

	1796 uprv_memset(map, 0, 256);

	1797 uprv_memset(usedOutChar, 0, 256);

	1798

	1799 if(tokenCount>256) {

	1800 tokenCount=256;

	1801 }

	1802

	1803 /* set the direct bytes (byte 0 always maps to itself) */

	1804 for(i=1; i<tokenCount; ++i) {

	1805 if(tokens[i]==-1) {

	1806 /* convert the direct byte character */

	1807 c1=(uint8_t)i;

	1808 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);

	1809 if(U_FAILURE(*pErrorCode)) {

	1810 udata_printError(ds, "unames/makeTokenMap() finds variant ch aracter 0x%02x used (input charset family %d)\n",

	1811 i, ds->inCharset);

	1812 return;

	1813 }

	1814

	1815 /* enter the converted character into the map and mark it used * /

	1816 map[c1]=c2;

	1817 usedOutChar[c2]=TRUE;

	1818 }

	1819 }

	1820

	1821 /* set the mappings for the rest of the permutation */

	1822 for(i=j=1; i<tokenCount; ++i) {

	1823 /* set mappings that were not set for direct bytes */

	1824 if(map[i]==0) {

	1825 /* set an output byte value that was not used as an output byte above */

	1826 while(usedOutChar[j]) {

	1827 ++j;

	1828 }

	1829 map[i]=(uint8_t)j++;

	1830 }

	1831 }

	1832

	1833 /*

	1834 * leave mappings at tokenCount and above unset if tokenCount<256

	1835 * because they won't be used

	1836 */

	1837 }

	1838 }

	1839

	1840 U_CAPI int32_t U_EXPORT2

	1841 uchar_swapNames(const UDataSwapper *ds,

	1842 const void inData, int32_t length, void outData,

	1843 UErrorCode *pErrorCode) {

	1844 const UDataInfo *pInfo;

	1845 int32_t headerSize;

	1846

	1847 const uint8_t *inBytes;

	1848 uint8_t *outBytes;

	1849

	1850 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,

	1851 offset, i, count, stringsCount;

	1852

	1853 const AlgorithmicRange *inRange;

	1854 AlgorithmicRange *outRange;

	1855

	1856 /* udata_swapDataHeader checks the arguments */

	1857 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

	1858 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	1859 return 0;

	1860 }

	1861

	1862 /* check data format and format version */

	1863 pInfo=(const UDataInfo )((const char )inData+4);

	1864 if(!(

	1865 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */

	1866 pInfo->dataFormat[1]==0x6e &&

	1867 pInfo->dataFormat[2]==0x61 &&

	1868 pInfo->dataFormat[3]==0x6d &&

	1869 pInfo->formatVersion[0]==1

	1870 )) {

	1871 udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",

	1872 pInfo->dataFormat[0], pInfo->dataFormat[1],

	1873 pInfo->dataFormat[2], pInfo->dataFormat[3],

	1874 pInfo->formatVersion[0]);

	1875 *pErrorCode=U_UNSUPPORTED_ERROR;

	1876 return 0;

	1877 }

	1878

	1879 inBytes=(const uint8_t *)inData+headerSize;

	1880 outBytes=(uint8_t *)outData+headerSize;

	1881 if(length<0) {

	1882 algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);

	1883 } else {

	1884 length-=headerSize;

	1885 if( length<20 \|\|

	1886 (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)i nBytes)[3]))

	1887 ) {

	1888 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after hea der) for unames.icu\n",

	1889 length);

	1890 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	1891 return 0;

	1892 }

	1893 }

	1894

	1895 if(length<0) {

	1896 /* preflighting: iterate through algorithmic ranges */

	1897 offset=algNamesOffset;

	1898 count=ds->readUInt32(((const uint32_t )(inBytes+offset)));

	1899 offset+=4;

	1900

	1901 for(i=0; i<count; ++i) {

	1902 inRange=(const AlgorithmicRange *)(inBytes+offset);

	1903 offset+=ds->readUInt16(inRange->size);

	1904 }

	1905 } else {

	1906 /* swap data */

	1907 const uint16_t *p;

	1908 uint16_t q, temp;

	1909

	1910 int16_t tokens[512];

	1911 uint16_t tokenCount;

	1912

	1913 uint8_t map[256], trailMap[256];

	1914

	1915 /* copy the data for inaccessible bytes */

	1916 if(inBytes!=outBytes) {

	1917 uprv_memcpy(outBytes, inBytes, length);

	1918 }

	1919

	1920 /* the initial 4 offsets first */

	1921 tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);

	1922 groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);

	1923 groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);

	1924 ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);

	1925

	1926 /*

	1927 * now the tokens table

	1928 * it needs to be permutated along with the compressed name strings

	1929 */

	1930 p=(const uint16_t *)(inBytes+16);

	1931 q=(uint16_t *)(outBytes+16);

	1932

	1933 /* read and swap the tokenCount */

	1934 tokenCount=ds->readUInt16(*p);

	1935 ds->swapArray16(ds, p, 2, q, pErrorCode);

	1936 ++p;

	1937 ++q;

	1938

	1939 /* read the first 512 tokens and make the token maps */

	1940 if(tokenCount<=512) {

	1941 count=tokenCount;

	1942 } else {

	1943 count=512;

	1944 }

	1945 for(i=0; i<count; ++i) {

	1946 tokens[i]=udata_readInt16(ds, p[i]);

	1947 }

	1948 for(; i<512; ++i) {

	1949 tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */

	1950 }

	1951 makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);

	1952 makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);

	1953 if(U_FAILURE(*pErrorCode)) {

	1954 return 0;

	1955 }

	1956

	1957 /*

	1958 * swap and permutate the tokens

	1959 * go through a temporary array to support in-place swapping

	1960 */

	1961 temp=(uint16_t )uprv_malloc(tokenCount2);

	1962 if(temp==NULL) {

	1963 udata_printError(ds, "out of memory swapping %u unames.icu tokens\n" ,

	1964 tokenCount);

	1965 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

	1966 return 0;

	1967 }

	1968

	1969 /* swap and permutate single-/lead-byte tokens */

	1970 for(i=0; i<tokenCount && i<256; ++i) {

	1971 ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);

	1972 }

	1973

	1974 /* swap and permutate trail-byte tokens */

	1975 for(; i<tokenCount; ++i) {

	1976 ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pE rrorCode);

	1977 }

	1978

	1979 /* copy the result into the output and free the temporary array */

	1980 uprv_memcpy(q, temp, tokenCount*2);

	1981 uprv_free(temp);

	1982

	1983 /*

	1984 * swap the token strings but not a possible padding byte after

	1985 * the terminating NUL of the last string

	1986 */

	1987 udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groups Offset-tokenStringOffset),

	1988 outBytes+tokenStringOffset, pErrorCode);

	1989 if(U_FAILURE(*pErrorCode)) {

	1990 udata_printError(ds, "uchar_swapNames(token strings) failed\n");

	1991 return 0;

	1992 }

	1993

	1994 /* swap the group table */

	1995 count=ds->readUInt16(((const uint16_t )(inBytes+groupsOffset)));

	1996 ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count3)2),

	1997 outBytes+groupsOffset, pErrorCode);

	1998

	1999 /*

	2000 * swap the group strings

	2001 * swap the string bytes but not the nibble-encoded string lengths

	2002 */

	2003 if(ds->inCharset!=ds->outCharset) {

	2004 uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];

	2005

	2006 const uint8_t inStrings, nextInStrings;

	2007 uint8_t *outStrings;

	2008

	2009 uint8_t c;

	2010

	2011 inStrings=inBytes+groupStringOffset;

	2012 outStrings=outBytes+groupStringOffset;

	2013

	2014 stringsCount=algNamesOffset-groupStringOffset;

	2015

	2016 /* iterate through string groups until only a few padding bytes are left */

	2017 while(stringsCount>32) {

	2018 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);

	2019

	2020 /* move past the length bytes */

	2021 stringsCount-=(uint32_t)(nextInStrings-inStrings);

	2022 outStrings+=nextInStrings-inStrings;

	2023 inStrings=nextInStrings;

	2024

	2025 count=offsets[31]+lengths[31]; /* total number of string bytes i n this group */

	2026 stringsCount-=count;

	2027

	2028 /* swap the string bytes using map[] and trailMap[] */

	2029 while(count>0) {

	2030 c=*inStrings++;

	2031 *outStrings++=map[c];

	2032 if(tokens[c]!=-2) {

	2033 --count;

	2034 } else {

	2035 /* token lead byte: swap the trail byte, too */

	2036 outStrings++=trailMap[inStrings++];

	2037 count-=2;

	2038 }

	2039 }

	2040 }

	2041 }

	2042

	2043 /* swap the algorithmic ranges */

	2044 offset=algNamesOffset;

	2045 count=ds->readUInt32(((const uint32_t )(inBytes+offset)));

	2046 ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);

	2047 offset+=4;

	2048

	2049 for(i=0; i<count; ++i) {

	2050 if(offset>(uint32_t)length) {

	2051 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",

	2052 length, i);

	2053 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	2054 return 0;

	2055 }

	2056

	2057 inRange=(const AlgorithmicRange *)(inBytes+offset);

	2058 outRange=(AlgorithmicRange *)(outBytes+offset);

	2059 offset+=ds->readUInt16(inRange->size);

	2060

	2061 ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);

	2062 ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);

	2063 switch(inRange->type) {

	2064 case 0:

	2065 /* swap prefix string */

	2066 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),

	2067 outRange+1, pErrorCode);

	2068 if(U_FAILURE(*pErrorCode)) {

	2069 udata_printError(ds, "uchar_swapNames(prefix string of algor ithmic range %u) failed\n",

	2070 i);

	2071 return 0;

	2072 }

	2073 break;

	2074 case 1:

	2075 {

	2076 /* swap factors and the prefix and factor strings */

	2077 uint32_t factorsCount;

	2078

	2079 factorsCount=inRange->variant;

	2080 p=(const uint16_t *)(inRange+1);

	2081 q=(uint16_t *)(outRange+1);

	2082 ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorC ode);

	2083

	2084 /* swap the strings, up to the last terminating NUL */

	2085 p+=factorsCount;

	2086 q+=factorsCount;

	2087 stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p) ;

	2088 while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1] !=0) {

	2089 --stringsCount;

	2090 }

	2091 ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode );

	2092 }

	2093 break;

	2094 default:

	2095 udata_printError(ds, "uchar_swapNames(): unknown type %u of algo rithmic range %u\n",

	2096 inRange->type, i);

	2097 *pErrorCode=U_UNSUPPORTED_ERROR;

	2098 return 0;

	2099 }

	2100 }

	2101 }

	2102

	2103 return headerSize+(int32_t)offset;

	2104 }

	2105

	2106 /*

	2107 * Hey, Emacs, please set the following:

	2108 *

	2109 * Local Variables:

	2110 * indent-tabs-mode: nil

	2111 * End:

	2112 *

	2113 */

OLD	NEW

« no previous file with comments | « icu46/source/common/umutex.c ('k') | icu46/source/common/unicode/brkiter.h » ('j') | no next file with comments »