OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * |
| 4 * Copyright (C) 2001-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ****************************************************************************** |
| 8 * file name: utrie2.cpp |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2008aug16 (starting from a copy of utrie.c) |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * This is a common implementation of a Unicode trie. |
| 17 * It is a kind of compressed, serializable table of 16- or 32-bit values assoc
iated with |
| 18 * Unicode code points (0..0x10ffff). |
| 19 * This is the second common version of a Unicode trie (hence the name UTrie2). |
| 20 * See utrie2.h for a comparison. |
| 21 * |
| 22 * This file contains only the runtime and enumeration code, for read-only acce
ss. |
| 23 * See utrie2_builder.c for the builder code. |
| 24 */ |
| 25 #ifdef UTRIE2_DEBUG |
| 26 # include <stdio.h> |
| 27 #endif |
| 28 |
| 29 #include "unicode/utypes.h" |
| 30 #include "cmemory.h" |
| 31 #include "utrie2.h" |
| 32 #include "utrie2_impl.h" |
| 33 |
| 34 /* Public UTrie2 API implementation ----------------------------------------- */ |
| 35 |
| 36 static uint32_t |
| 37 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { |
| 38 int32_t i2, block; |
| 39 |
| 40 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { |
| 41 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; |
| 42 } |
| 43 |
| 44 if(U_IS_LEAD(c) && fromLSCP) { |
| 45 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ |
| 46 (c>>UTRIE2_SHIFT_2); |
| 47 } else { |
| 48 i2=trie->index1[c>>UTRIE2_SHIFT_1]+ |
| 49 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); |
| 50 } |
| 51 block=trie->index2[i2]; |
| 52 return trie->data[block+(c&UTRIE2_DATA_MASK)]; |
| 53 } |
| 54 |
| 55 U_CAPI uint32_t U_EXPORT2 |
| 56 utrie2_get32(const UTrie2 *trie, UChar32 c) { |
| 57 if(trie->data16!=NULL) { |
| 58 return UTRIE2_GET16(trie, c); |
| 59 } else if(trie->data32!=NULL) { |
| 60 return UTRIE2_GET32(trie, c); |
| 61 } else if((uint32_t)c>0x10ffff) { |
| 62 return trie->errorValue; |
| 63 } else { |
| 64 return get32(trie->newTrie, c, TRUE); |
| 65 } |
| 66 } |
| 67 |
| 68 U_CAPI uint32_t U_EXPORT2 |
| 69 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { |
| 70 if(!U_IS_LEAD(c)) { |
| 71 return trie->errorValue; |
| 72 } |
| 73 if(trie->data16!=NULL) { |
| 74 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); |
| 75 } else if(trie->data32!=NULL) { |
| 76 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); |
| 77 } else { |
| 78 return get32(trie->newTrie, c, FALSE); |
| 79 } |
| 80 } |
| 81 |
| 82 static U_INLINE int32_t |
| 83 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { |
| 84 int32_t idx= |
| 85 _UTRIE2_INDEX_FROM_CP( |
| 86 trie, |
| 87 trie->data32==NULL ? trie->indexLength : 0, |
| 88 c); |
| 89 return (idx<<3)|i; |
| 90 } |
| 91 |
| 92 U_CAPI int32_t U_EXPORT2 |
| 93 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, |
| 94 const uint8_t *src, const uint8_t *limit) { |
| 95 int32_t i, length; |
| 96 i=0; |
| 97 /* support 64-bit pointers by avoiding cast of arbitrary difference */ |
| 98 if((limit-src)<=7) { |
| 99 length=(int32_t)(limit-src); |
| 100 } else { |
| 101 length=7; |
| 102 } |
| 103 c=utf8_nextCharSafeBody(src, &i, length, c, -1); |
| 104 return u8Index(trie, c, i); |
| 105 } |
| 106 |
| 107 U_CAPI int32_t U_EXPORT2 |
| 108 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, |
| 109 const uint8_t *start, const uint8_t *src) { |
| 110 int32_t i, length; |
| 111 /* support 64-bit pointers by avoiding cast of arbitrary difference */ |
| 112 if((src-start)<=7) { |
| 113 i=length=(int32_t)(src-start); |
| 114 } else { |
| 115 i=length=7; |
| 116 start=src-7; |
| 117 } |
| 118 c=utf8_prevCharSafeBody(start, 0, &i, c, -1); |
| 119 i=length-i; /* number of bytes read backward from src */ |
| 120 return u8Index(trie, c, i); |
| 121 } |
| 122 |
| 123 U_CAPI UTrie2 * U_EXPORT2 |
| 124 utrie2_openFromSerialized(UTrie2ValueBits valueBits, |
| 125 const void *data, int32_t length, int32_t *pActualLeng
th, |
| 126 UErrorCode *pErrorCode) { |
| 127 const UTrie2Header *header; |
| 128 const uint16_t *p16; |
| 129 int32_t actualLength; |
| 130 |
| 131 UTrie2 tempTrie; |
| 132 UTrie2 *trie; |
| 133 |
| 134 if(U_FAILURE(*pErrorCode)) { |
| 135 return 0; |
| 136 } |
| 137 |
| 138 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || |
| 139 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits |
| 140 ) { |
| 141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 142 return 0; |
| 143 } |
| 144 |
| 145 /* enough data for a trie header? */ |
| 146 if(length<(int32_t)sizeof(UTrie2Header)) { |
| 147 *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 148 return 0; |
| 149 } |
| 150 |
| 151 /* check the signature */ |
| 152 header=(const UTrie2Header *)data; |
| 153 if(header->signature!=UTRIE2_SIG) { |
| 154 *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 155 return 0; |
| 156 } |
| 157 |
| 158 /* get the options */ |
| 159 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MA
SK)) { |
| 160 *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 161 return 0; |
| 162 } |
| 163 |
| 164 /* get the length values and offsets */ |
| 165 uprv_memset(&tempTrie, 0, sizeof(tempTrie)); |
| 166 tempTrie.indexLength=header->indexLength; |
| 167 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
| 168 tempTrie.index2NullOffset=header->index2NullOffset; |
| 169 tempTrie.dataNullOffset=header->dataNullOffset; |
| 170 |
| 171 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; |
| 172 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; |
| 173 if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 174 tempTrie.highValueIndex+=tempTrie.indexLength; |
| 175 } |
| 176 |
| 177 /* calculate the actual length */ |
| 178 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; |
| 179 if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 180 actualLength+=tempTrie.dataLength*2; |
| 181 } else { |
| 182 actualLength+=tempTrie.dataLength*4; |
| 183 } |
| 184 if(length<actualLength) { |
| 185 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ |
| 186 return 0; |
| 187 } |
| 188 |
| 189 /* allocate the trie */ |
| 190 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
| 191 if(trie==NULL) { |
| 192 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 193 return 0; |
| 194 } |
| 195 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); |
| 196 trie->memory=(uint32_t *)data; |
| 197 trie->length=actualLength; |
| 198 trie->isMemoryOwned=FALSE; |
| 199 |
| 200 /* set the pointers to its index and data arrays */ |
| 201 p16=(const uint16_t *)(header+1); |
| 202 trie->index=p16; |
| 203 p16+=trie->indexLength; |
| 204 |
| 205 /* get the data */ |
| 206 switch(valueBits) { |
| 207 case UTRIE2_16_VALUE_BITS: |
| 208 trie->data16=p16; |
| 209 trie->data32=NULL; |
| 210 trie->initialValue=trie->index[trie->dataNullOffset]; |
| 211 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; |
| 212 break; |
| 213 case UTRIE2_32_VALUE_BITS: |
| 214 trie->data16=NULL; |
| 215 trie->data32=(const uint32_t *)p16; |
| 216 trie->initialValue=trie->data32[trie->dataNullOffset]; |
| 217 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; |
| 218 break; |
| 219 default: |
| 220 *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 221 return 0; |
| 222 } |
| 223 |
| 224 if(pActualLength!=NULL) { |
| 225 *pActualLength=actualLength; |
| 226 } |
| 227 return trie; |
| 228 } |
| 229 |
| 230 U_CAPI UTrie2 * U_EXPORT2 |
| 231 utrie2_openDummy(UTrie2ValueBits valueBits, |
| 232 uint32_t initialValue, uint32_t errorValue, |
| 233 UErrorCode *pErrorCode) { |
| 234 UTrie2 *trie; |
| 235 UTrie2Header *header; |
| 236 uint32_t *p; |
| 237 uint16_t *dest16; |
| 238 int32_t indexLength, dataLength, length, i; |
| 239 int32_t dataMove; /* >0 if the data is moved to the end of the index array
*/ |
| 240 |
| 241 if(U_FAILURE(*pErrorCode)) { |
| 242 return 0; |
| 243 } |
| 244 |
| 245 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { |
| 246 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 247 return 0; |
| 248 } |
| 249 |
| 250 /* calculate the total length of the dummy trie data */ |
| 251 indexLength=UTRIE2_INDEX_1_OFFSET; |
| 252 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; |
| 253 length=(int32_t)sizeof(UTrie2Header)+indexLength*2; |
| 254 if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 255 length+=dataLength*2; |
| 256 } else { |
| 257 length+=dataLength*4; |
| 258 } |
| 259 |
| 260 /* allocate the trie */ |
| 261 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
| 262 if(trie==NULL) { |
| 263 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 264 return 0; |
| 265 } |
| 266 uprv_memset(trie, 0, sizeof(UTrie2)); |
| 267 trie->memory=uprv_malloc(length); |
| 268 if(trie->memory==NULL) { |
| 269 uprv_free(trie); |
| 270 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 271 return 0; |
| 272 } |
| 273 trie->length=length; |
| 274 trie->isMemoryOwned=TRUE; |
| 275 |
| 276 /* set the UTrie2 fields */ |
| 277 if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 278 dataMove=indexLength; |
| 279 } else { |
| 280 dataMove=0; |
| 281 } |
| 282 |
| 283 trie->indexLength=indexLength; |
| 284 trie->dataLength=dataLength; |
| 285 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; |
| 286 trie->dataNullOffset=(uint16_t)dataMove; |
| 287 trie->initialValue=initialValue; |
| 288 trie->errorValue=errorValue; |
| 289 trie->highStart=0; |
| 290 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; |
| 291 |
| 292 /* set the header fields */ |
| 293 header=(UTrie2Header *)trie->memory; |
| 294 |
| 295 header->signature=UTRIE2_SIG; /* "Tri2" */ |
| 296 header->options=(uint16_t)valueBits; |
| 297 |
| 298 header->indexLength=(uint16_t)indexLength; |
| 299 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); |
| 300 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; |
| 301 header->dataNullOffset=(uint16_t)dataMove; |
| 302 header->shiftedHighStart=0; |
| 303 |
| 304 /* fill the index and data arrays */ |
| 305 dest16=(uint16_t *)(header+1); |
| 306 trie->index=dest16; |
| 307 |
| 308 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ |
| 309 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { |
| 310 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block
*/ |
| 311 } |
| 312 |
| 313 /* write UTF-8 2-byte index-2 values, not right-shifted */ |
| 314 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ |
| 315 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); |
| 316 } |
| 317 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ |
| 318 *dest16++=(uint16_t)dataMove; |
| 319 } |
| 320 |
| 321 /* write the 16/32-bit data array */ |
| 322 switch(valueBits) { |
| 323 case UTRIE2_16_VALUE_BITS: |
| 324 /* write 16-bit data values */ |
| 325 trie->data16=dest16; |
| 326 trie->data32=NULL; |
| 327 for(i=0; i<0x80; ++i) { |
| 328 *dest16++=(uint16_t)initialValue; |
| 329 } |
| 330 for(; i<0xc0; ++i) { |
| 331 *dest16++=(uint16_t)errorValue; |
| 332 } |
| 333 /* highValue and reserved values */ |
| 334 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { |
| 335 *dest16++=(uint16_t)initialValue; |
| 336 } |
| 337 break; |
| 338 case UTRIE2_32_VALUE_BITS: |
| 339 /* write 32-bit data values */ |
| 340 p=(uint32_t *)dest16; |
| 341 trie->data16=NULL; |
| 342 trie->data32=p; |
| 343 for(i=0; i<0x80; ++i) { |
| 344 *p++=initialValue; |
| 345 } |
| 346 for(; i<0xc0; ++i) { |
| 347 *p++=errorValue; |
| 348 } |
| 349 /* highValue and reserved values */ |
| 350 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { |
| 351 *p++=initialValue; |
| 352 } |
| 353 break; |
| 354 default: |
| 355 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 356 return 0; |
| 357 } |
| 358 |
| 359 return trie; |
| 360 } |
| 361 |
| 362 U_CAPI void U_EXPORT2 |
| 363 utrie2_close(UTrie2 *trie) { |
| 364 if(trie!=NULL) { |
| 365 if(trie->isMemoryOwned) { |
| 366 uprv_free(trie->memory); |
| 367 } |
| 368 if(trie->newTrie!=NULL) { |
| 369 uprv_free(trie->newTrie->data); |
| 370 uprv_free(trie->newTrie); |
| 371 } |
| 372 uprv_free(trie); |
| 373 } |
| 374 } |
| 375 |
| 376 U_CAPI int32_t U_EXPORT2 |
| 377 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { |
| 378 uint32_t signature; |
| 379 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { |
| 380 return 0; |
| 381 } |
| 382 signature=*(const uint32_t *)data; |
| 383 if(signature==UTRIE2_SIG) { |
| 384 return 2; |
| 385 } |
| 386 if(anyEndianOk && signature==UTRIE2_OE_SIG) { |
| 387 return 2; |
| 388 } |
| 389 if(signature==UTRIE_SIG) { |
| 390 return 1; |
| 391 } |
| 392 if(anyEndianOk && signature==UTRIE_OE_SIG) { |
| 393 return 1; |
| 394 } |
| 395 return 0; |
| 396 } |
| 397 |
| 398 U_CAPI int32_t U_EXPORT2 |
| 399 utrie2_swap(const UDataSwapper *ds, |
| 400 const void *inData, int32_t length, void *outData, |
| 401 UErrorCode *pErrorCode) { |
| 402 const UTrie2Header *inTrie; |
| 403 UTrie2Header trie; |
| 404 int32_t dataLength, size; |
| 405 UTrie2ValueBits valueBits; |
| 406 |
| 407 if(U_FAILURE(*pErrorCode)) { |
| 408 return 0; |
| 409 } |
| 410 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { |
| 411 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 412 return 0; |
| 413 } |
| 414 |
| 415 /* setup and swapping */ |
| 416 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { |
| 417 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 418 return 0; |
| 419 } |
| 420 |
| 421 inTrie=(const UTrie2Header *)inData; |
| 422 trie.signature=ds->readUInt32(inTrie->signature); |
| 423 trie.options=ds->readUInt16(inTrie->options); |
| 424 trie.indexLength=ds->readUInt16(inTrie->indexLength); |
| 425 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); |
| 426 |
| 427 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); |
| 428 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
| 429 |
| 430 if( trie.signature!=UTRIE2_SIG || |
| 431 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || |
| 432 trie.indexLength<UTRIE2_INDEX_1_OFFSET || |
| 433 dataLength<UTRIE2_DATA_START_OFFSET |
| 434 ) { |
| 435 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ |
| 436 return 0; |
| 437 } |
| 438 |
| 439 size=sizeof(UTrie2Header)+trie.indexLength*2; |
| 440 switch(valueBits) { |
| 441 case UTRIE2_16_VALUE_BITS: |
| 442 size+=dataLength*2; |
| 443 break; |
| 444 case UTRIE2_32_VALUE_BITS: |
| 445 size+=dataLength*4; |
| 446 break; |
| 447 default: |
| 448 *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 449 return 0; |
| 450 } |
| 451 |
| 452 if(length>=0) { |
| 453 UTrie2Header *outTrie; |
| 454 |
| 455 if(length<size) { |
| 456 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 457 return 0; |
| 458 } |
| 459 |
| 460 outTrie=(UTrie2Header *)outData; |
| 461 |
| 462 /* swap the header */ |
| 463 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCo
de); |
| 464 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode)
; |
| 465 |
| 466 /* swap the index and the data */ |
| 467 switch(valueBits) { |
| 468 case UTRIE2_16_VALUE_BITS: |
| 469 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTr
ie+1, pErrorCode); |
| 470 break; |
| 471 case UTRIE2_32_VALUE_BITS: |
| 472 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorC
ode); |
| 473 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, d
ataLength*4, |
| 474 (uint16_t *)(outTrie+1)+trie.indexLength, p
ErrorCode); |
| 475 break; |
| 476 default: |
| 477 *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 478 return 0; |
| 479 } |
| 480 } |
| 481 |
| 482 return size; |
| 483 } |
| 484 |
| 485 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c |
| 486 // to avoid a dependency from utrie2.cpp on utrie.c. |
| 487 |
| 488 /* enumeration -------------------------------------------------------------- */ |
| 489 |
| 490 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) |
| 491 |
| 492 /* default UTrie2EnumValue() returns the input value itself */ |
| 493 static uint32_t U_CALLCONV |
| 494 enumSameValue(const void * /*context*/, uint32_t value) { |
| 495 return value; |
| 496 } |
| 497 |
| 498 /** |
| 499 * Enumerate all ranges of code points with the same relevant values. |
| 500 * The values are transformed from the raw trie entries by the enumValue functio
n. |
| 501 * |
| 502 * Currently requires start<limit and both start and limit must be multiples |
| 503 * of UTRIE2_DATA_BLOCK_LENGTH. |
| 504 * |
| 505 * Optimizations: |
| 506 * - Skip a whole block if we know that it is filled with a single value, |
| 507 * and it is the same as we visited just before. |
| 508 * - Handle the null block specially because we know a priori that it is filled |
| 509 * with a single value. |
| 510 */ |
| 511 static void |
| 512 enumEitherTrie(const UTrie2 *trie, |
| 513 UChar32 start, UChar32 limit, |
| 514 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const voi
d *context) { |
| 515 const uint32_t *data32; |
| 516 const uint16_t *idx; |
| 517 |
| 518 uint32_t value, prevValue, initialValue; |
| 519 UChar32 c, prev, highStart; |
| 520 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlo
ck; |
| 521 |
| 522 if(enumRange==NULL) { |
| 523 return; |
| 524 } |
| 525 if(enumValue==NULL) { |
| 526 enumValue=enumSameValue; |
| 527 } |
| 528 |
| 529 if(trie->newTrie==NULL) { |
| 530 /* frozen trie */ |
| 531 idx=trie->index; |
| 532 data32=trie->data32; |
| 533 |
| 534 index2NullOffset=trie->index2NullOffset; |
| 535 nullBlock=trie->dataNullOffset; |
| 536 } else { |
| 537 /* unfrozen, mutable trie */ |
| 538 idx=NULL; |
| 539 data32=trie->newTrie->data; |
| 540 |
| 541 index2NullOffset=trie->newTrie->index2NullOffset; |
| 542 nullBlock=trie->newTrie->dataNullOffset; |
| 543 } |
| 544 |
| 545 highStart=trie->highStart; |
| 546 |
| 547 /* get the enumeration value that corresponds to an initial-value trie data
entry */ |
| 548 initialValue=enumValue(context, trie->initialValue); |
| 549 |
| 550 /* set variables for previous range */ |
| 551 prevI2Block=-1; |
| 552 prevBlock=-1; |
| 553 prev=start; |
| 554 prevValue=0; |
| 555 |
| 556 /* enumerate index-2 blocks */ |
| 557 for(c=start; c<limit && c<highStart;) { |
| 558 /* Code point limit for iterating inside this i2Block. */ |
| 559 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; |
| 560 if(limit<tempLimit) { |
| 561 tempLimit=limit; |
| 562 } |
| 563 if(c<=0xffff) { |
| 564 if(!U_IS_SURROGATE(c)) { |
| 565 i2Block=c>>UTRIE2_SHIFT_2; |
| 566 } else if(U_IS_SURROGATE_LEAD(c)) { |
| 567 /* |
| 568 * Enumerate values for lead surrogate code points, not code uni
ts: |
| 569 * This special block has half the normal length. |
| 570 */ |
| 571 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; |
| 572 tempLimit=MIN_VALUE(0xdc00, limit); |
| 573 } else { |
| 574 /* |
| 575 * Switch back to the normal part of the index-2 table. |
| 576 * Enumerate the second half of the surrogates block. |
| 577 */ |
| 578 i2Block=0xd800>>UTRIE2_SHIFT_2; |
| 579 tempLimit=MIN_VALUE(0xe000, limit); |
| 580 } |
| 581 } else { |
| 582 /* supplementary code points */ |
| 583 if(idx!=NULL) { |
| 584 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LE
NGTH)+ |
| 585 (c>>UTRIE2_SHIFT_1)]; |
| 586 } else { |
| 587 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; |
| 588 } |
| 589 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { |
| 590 /* |
| 591 * The index-2 block is the same as the previous one, and filled
with prevValue. |
| 592 * Only possible for supplementary code points because the linea
r-BMP index-2 |
| 593 * table creates unique i2Block values. |
| 594 */ |
| 595 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; |
| 596 continue; |
| 597 } |
| 598 } |
| 599 prevI2Block=i2Block; |
| 600 if(i2Block==index2NullOffset) { |
| 601 /* this is the null index-2 block */ |
| 602 if(prevValue!=initialValue) { |
| 603 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
| 604 return; |
| 605 } |
| 606 prevBlock=nullBlock; |
| 607 prev=c; |
| 608 prevValue=initialValue; |
| 609 } |
| 610 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; |
| 611 } else { |
| 612 /* enumerate data blocks for one index-2 block */ |
| 613 int32_t i2, i2Limit; |
| 614 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
| 615 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { |
| 616 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
| 617 } else { |
| 618 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; |
| 619 } |
| 620 for(; i2<i2Limit; ++i2) { |
| 621 if(idx!=NULL) { |
| 622 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; |
| 623 } else { |
| 624 block=trie->newTrie->index2[i2Block+i2]; |
| 625 } |
| 626 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { |
| 627 /* the block is the same as the previous one, and filled wit
h prevValue */ |
| 628 c+=UTRIE2_DATA_BLOCK_LENGTH; |
| 629 continue; |
| 630 } |
| 631 prevBlock=block; |
| 632 if(block==nullBlock) { |
| 633 /* this is the null data block */ |
| 634 if(prevValue!=initialValue) { |
| 635 if(prev<c && !enumRange(context, prev, c-1, prevValue))
{ |
| 636 return; |
| 637 } |
| 638 prev=c; |
| 639 prevValue=initialValue; |
| 640 } |
| 641 c+=UTRIE2_DATA_BLOCK_LENGTH; |
| 642 } else { |
| 643 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { |
| 644 value=enumValue(context, data32!=NULL ? data32[block+j]
: idx[block+j]); |
| 645 if(value!=prevValue) { |
| 646 if(prev<c && !enumRange(context, prev, c-1, prevValu
e)) { |
| 647 return; |
| 648 } |
| 649 prev=c; |
| 650 prevValue=value; |
| 651 } |
| 652 ++c; |
| 653 } |
| 654 } |
| 655 } |
| 656 } |
| 657 } |
| 658 |
| 659 if(c>limit) { |
| 660 c=limit; /* could be higher if in the index2NullOffset */ |
| 661 } else if(c<limit) { |
| 662 /* c==highStart<limit */ |
| 663 uint32_t highValue; |
| 664 if(idx!=NULL) { |
| 665 highValue= |
| 666 data32!=NULL ? |
| 667 data32[trie->highValueIndex] : |
| 668 idx[trie->highValueIndex]; |
| 669 } else { |
| 670 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_
GRANULARITY]; |
| 671 } |
| 672 value=enumValue(context, highValue); |
| 673 if(value!=prevValue) { |
| 674 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
| 675 return; |
| 676 } |
| 677 prev=c; |
| 678 prevValue=value; |
| 679 } |
| 680 c=limit; |
| 681 } |
| 682 |
| 683 /* deliver last range */ |
| 684 enumRange(context, prev, c-1, prevValue); |
| 685 } |
| 686 |
| 687 U_CAPI void U_EXPORT2 |
| 688 utrie2_enum(const UTrie2 *trie, |
| 689 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *
context) { |
| 690 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); |
| 691 } |
| 692 |
| 693 U_CAPI void U_EXPORT2 |
| 694 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, |
| 695 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRan
ge, |
| 696 const void *context) { |
| 697 if(!U16_IS_LEAD(lead)) { |
| 698 return; |
| 699 } |
| 700 lead=(lead-0xd7c0)<<10; /* start code point */ |
| 701 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); |
| 702 } |
| 703 |
| 704 /* C++ convenience wrappers ------------------------------------------------- */ |
| 705 |
| 706 U_NAMESPACE_BEGIN |
| 707 |
| 708 uint16_t BackwardUTrie2StringIterator::previous16() { |
| 709 codePointLimit=codePointStart; |
| 710 if(start>=codePointStart) { |
| 711 codePoint=U_SENTINEL; |
| 712 return 0; |
| 713 } |
| 714 uint16_t result; |
| 715 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); |
| 716 return result; |
| 717 } |
| 718 |
| 719 uint16_t ForwardUTrie2StringIterator::next16() { |
| 720 codePointStart=codePointLimit; |
| 721 if(codePointLimit==limit) { |
| 722 codePoint=U_SENTINEL; |
| 723 return 0; |
| 724 } |
| 725 uint16_t result; |
| 726 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); |
| 727 return result; |
| 728 } |
| 729 |
| 730 UTrie2 *UTrie2Singleton::getInstance(InstantiatorFn *instantiator, const void *c
ontext, |
| 731 UErrorCode &errorCode) { |
| 732 void *duplicate; |
| 733 UTrie2 *instance=(UTrie2 *)singleton.getInstance(instantiator, context, dupl
icate, errorCode); |
| 734 utrie2_close((UTrie2 *)duplicate); |
| 735 return instance; |
| 736 } |
| 737 |
| 738 U_NAMESPACE_END |
OLD | NEW |