| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ****************************************************************************** | |
| 3 * Copyright (C) 1996-2014, International Business Machines | |
| 4 * Corporation and others. All Rights Reserved. | |
| 5 ****************************************************************************** | |
| 6 */ | |
| 7 | |
| 8 #include "unicode/utypes.h" | |
| 9 | |
| 10 #if !UCONFIG_NO_COLLATION | |
| 11 | |
| 12 #include "unicode/unistr.h" | |
| 13 #include "unicode/usearch.h" | |
| 14 | |
| 15 #include "cmemory.h" | |
| 16 #include "unicode/coll.h" | |
| 17 #include "unicode/tblcoll.h" | |
| 18 #include "unicode/coleitr.h" | |
| 19 #include "unicode/ucoleitr.h" | |
| 20 | |
| 21 #include "unicode/regex.h" // TODO: make conditional on regexp being buil
t. | |
| 22 | |
| 23 #include "unicode/uniset.h" | |
| 24 #include "unicode/uset.h" | |
| 25 #include "unicode/usetiter.h" | |
| 26 #include "unicode/ustring.h" | |
| 27 #include "hash.h" | |
| 28 #include "normalizer2impl.h" | |
| 29 #include "uhash.h" | |
| 30 #include "usrchimp.h" | |
| 31 #include "uassert.h" | |
| 32 | |
| 33 #include "colldata.h" | |
| 34 | |
| 35 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) | |
| 36 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) | |
| 37 #define DELETE_ARRAY(array) uprv_free((void *) (array)) | |
| 38 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src),
(count) * sizeof (src)[0]) | |
| 39 | |
| 40 CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) | |
| 41 : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) | |
| 42 { | |
| 43 UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), stri
ng.length(), &status); | |
| 44 UCollationStrength strength = ucol_getStrength(coll); | |
| 45 UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==
UCOL_SHIFTED; | |
| 46 uint32_t variableTop = ucol_getVariableTop(coll, &status); | |
| 47 uint32_t strengthMask = 0; | |
| 48 int32_t order; | |
| 49 | |
| 50 if (U_FAILURE(status)) { | |
| 51 return; | |
| 52 } | |
| 53 | |
| 54 // **** only set flag if string has Han(gul) **** | |
| 55 // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476 | |
| 56 | |
| 57 switch (strength) | |
| 58 { | |
| 59 default: | |
| 60 strengthMask |= UCOL_TERTIARYORDERMASK; | |
| 61 /* fall through */ | |
| 62 | |
| 63 case UCOL_SECONDARY: | |
| 64 strengthMask |= UCOL_SECONDARYORDERMASK; | |
| 65 /* fall through */ | |
| 66 | |
| 67 case UCOL_PRIMARY: | |
| 68 strengthMask |= UCOL_PRIMARYORDERMASK; | |
| 69 } | |
| 70 | |
| 71 ces = ceBuffer; | |
| 72 | |
| 73 while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) { | |
| 74 UBool cont = isContinuation(order); | |
| 75 | |
| 76 order &= strengthMask; | |
| 77 | |
| 78 if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORD
ERMASK) != 0) { | |
| 79 if (strength >= UCOL_QUATERNARY) { | |
| 80 order &= UCOL_PRIMARYORDERMASK; | |
| 81 } else { | |
| 82 order = UCOL_IGNORABLE; | |
| 83 } | |
| 84 } | |
| 85 | |
| 86 if (order == UCOL_IGNORABLE) { | |
| 87 continue; | |
| 88 } | |
| 89 | |
| 90 if (cont) { | |
| 91 order |= UCOL_CONTINUATION_MARKER; | |
| 92 } | |
| 93 | |
| 94 add(order, status); | |
| 95 } | |
| 96 | |
| 97 ucol_closeElements(elems); | |
| 98 } | |
| 99 | |
| 100 CEList::~CEList() | |
| 101 { | |
| 102 if (ces != ceBuffer) { | |
| 103 DELETE_ARRAY(ces); | |
| 104 } | |
| 105 } | |
| 106 | |
| 107 void CEList::add(uint32_t ce, UErrorCode &status) | |
| 108 { | |
| 109 if (U_FAILURE(status)) { | |
| 110 return; | |
| 111 } | |
| 112 | |
| 113 if (listSize >= listMax) { | |
| 114 int32_t newMax = listMax + CELIST_BUFFER_SIZE; | |
| 115 uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax); | |
| 116 | |
| 117 if (newCEs == NULL) { | |
| 118 status = U_MEMORY_ALLOCATION_ERROR; | |
| 119 return; | |
| 120 } | |
| 121 | |
| 122 uprv_memcpy(newCEs, ces, listSize * sizeof(uint32_t)); | |
| 123 | |
| 124 if (ces != ceBuffer) { | |
| 125 DELETE_ARRAY(ces); | |
| 126 } | |
| 127 | |
| 128 ces = newCEs; | |
| 129 listMax = newMax; | |
| 130 } | |
| 131 | |
| 132 ces[listSize++] = ce; | |
| 133 } | |
| 134 | |
| 135 uint32_t CEList::get(int32_t index) const | |
| 136 { | |
| 137 if (index >= 0 && index < listSize) { | |
| 138 return ces[index]; | |
| 139 } | |
| 140 | |
| 141 return (uint32_t)UCOL_NULLORDER; | |
| 142 } | |
| 143 | |
| 144 uint32_t &CEList::operator[](int32_t index) const | |
| 145 { | |
| 146 return ces[index]; | |
| 147 } | |
| 148 | |
| 149 UBool CEList::matchesAt(int32_t offset, const CEList *other) const | |
| 150 { | |
| 151 if (other == NULL || listSize - offset < other->size()) { | |
| 152 return FALSE; | |
| 153 } | |
| 154 | |
| 155 for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) { | |
| 156 if (ces[i] != (*other)[j]) { | |
| 157 return FALSE; | |
| 158 } | |
| 159 } | |
| 160 | |
| 161 return TRUE; | |
| 162 } | |
| 163 | |
| 164 int32_t CEList::size() const | |
| 165 { | |
| 166 return listSize; | |
| 167 } | |
| 168 | |
| 169 StringList::StringList(UErrorCode &status) | |
| 170 : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0) | |
| 171 { | |
| 172 if (U_FAILURE(status)) { | |
| 173 return; | |
| 174 } | |
| 175 | |
| 176 strings = new UnicodeString [listMax]; | |
| 177 | |
| 178 if (strings == NULL) { | |
| 179 status = U_MEMORY_ALLOCATION_ERROR; | |
| 180 return; | |
| 181 } | |
| 182 } | |
| 183 | |
| 184 StringList::~StringList() | |
| 185 { | |
| 186 delete[] strings; | |
| 187 } | |
| 188 | |
| 189 void StringList::add(const UnicodeString *string, UErrorCode &status) | |
| 190 { | |
| 191 if (U_FAILURE(status)) { | |
| 192 return; | |
| 193 } | |
| 194 if (listSize >= listMax) { | |
| 195 int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE; | |
| 196 UnicodeString *newStrings = new UnicodeString[newMax]; | |
| 197 if (newStrings == NULL) { | |
| 198 status = U_MEMORY_ALLOCATION_ERROR; | |
| 199 return; | |
| 200 } | |
| 201 for (int32_t i=0; i<listSize; ++i) { | |
| 202 newStrings[i] = strings[i]; | |
| 203 } | |
| 204 delete[] strings; | |
| 205 strings = newStrings; | |
| 206 listMax = newMax; | |
| 207 } | |
| 208 | |
| 209 // The ctor initialized all the strings in | |
| 210 // the array to empty strings, so this | |
| 211 // is the same as copying the source string. | |
| 212 strings[listSize++].append(*string); | |
| 213 } | |
| 214 | |
| 215 void StringList::add(const UChar *chars, int32_t count, UErrorCode &status) | |
| 216 { | |
| 217 const UnicodeString string(chars, count); | |
| 218 | |
| 219 add(&string, status); | |
| 220 } | |
| 221 | |
| 222 const UnicodeString *StringList::get(int32_t index) const | |
| 223 { | |
| 224 if (index >= 0 && index < listSize) { | |
| 225 return &strings[index]; | |
| 226 } | |
| 227 | |
| 228 return NULL; | |
| 229 } | |
| 230 | |
| 231 int32_t StringList::size() const | |
| 232 { | |
| 233 return listSize; | |
| 234 } | |
| 235 | |
| 236 | |
| 237 U_CDECL_BEGIN | |
| 238 static void U_CALLCONV | |
| 239 deleteStringList(void *obj) | |
| 240 { | |
| 241 StringList *strings = (StringList *) obj; | |
| 242 | |
| 243 delete strings; | |
| 244 } | |
| 245 U_CDECL_END | |
| 246 | |
| 247 class CEToStringsMap | |
| 248 { | |
| 249 public: | |
| 250 CEToStringsMap(UErrorCode &status); | |
| 251 ~CEToStringsMap(); | |
| 252 | |
| 253 void put(uint32_t ce, UnicodeString *string, UErrorCode &status); | |
| 254 StringList *getStringList(uint32_t ce) const; | |
| 255 | |
| 256 private: | |
| 257 void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status); | |
| 258 UHashtable *map; | |
| 259 }; | |
| 260 | |
| 261 CEToStringsMap::CEToStringsMap(UErrorCode &status) | |
| 262 : map(NULL) | |
| 263 { | |
| 264 if (U_FAILURE(status)) { | |
| 265 return; | |
| 266 } | |
| 267 | |
| 268 map = uhash_open(uhash_hashLong, uhash_compareLong, | |
| 269 uhash_compareCaselessUnicodeString, | |
| 270 &status); | |
| 271 | |
| 272 if (U_FAILURE(status)) { | |
| 273 return; | |
| 274 } | |
| 275 | |
| 276 uhash_setValueDeleter(map, deleteStringList); | |
| 277 } | |
| 278 | |
| 279 CEToStringsMap::~CEToStringsMap() | |
| 280 { | |
| 281 uhash_close(map); | |
| 282 } | |
| 283 | |
| 284 void CEToStringsMap::put(uint32_t ce, UnicodeString *string, UErrorCode &status) | |
| 285 { | |
| 286 StringList *strings = getStringList(ce); | |
| 287 | |
| 288 if (strings == NULL) { | |
| 289 strings = new StringList(status); | |
| 290 | |
| 291 if (strings == NULL || U_FAILURE(status)) { | |
| 292 status = U_MEMORY_ALLOCATION_ERROR; | |
| 293 return; | |
| 294 } | |
| 295 | |
| 296 putStringList(ce, strings, status); | |
| 297 } | |
| 298 | |
| 299 strings->add(string, status); | |
| 300 } | |
| 301 | |
| 302 StringList *CEToStringsMap::getStringList(uint32_t ce) const | |
| 303 { | |
| 304 return (StringList *) uhash_iget(map, ce); | |
| 305 } | |
| 306 | |
| 307 void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCo
de &status) | |
| 308 { | |
| 309 uhash_iput(map, ce, (void *) stringList, &status); | |
| 310 } | |
| 311 | |
| 312 #define CLONE_COLLATOR | |
| 313 | |
| 314 CollData::CollData(UCollator *collator, UErrorCode &status) | |
| 315 : coll(NULL), ceToCharsStartingWith(NULL) | |
| 316 { | |
| 317 // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]] | |
| 318 // i.e. other, control, private use, format, surrogate | |
| 319 U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20); | |
| 320 U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20); | |
| 321 USet *charsToTest = uset_openPattern(test_pattern, 20, &status); | |
| 322 | |
| 323 // Han ext. A, Han, Jamo, Hangul, Han Ext. B | |
| 324 // i.e. all the characers we handle implicitly | |
| 325 U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\
\uD7AF][\\U00020000-\\U0002A6DF]]", 70); | |
| 326 U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\
\uD7AF][\\U00020000-\\U0002A6DF]]", 70); | |
| 327 USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status); | |
| 328 | |
| 329 if (U_FAILURE(status)) { | |
| 330 return; | |
| 331 } | |
| 332 | |
| 333 USet *expansions = uset_openEmpty(); | |
| 334 USet *contractions = uset_openEmpty(); | |
| 335 int32_t itemCount; | |
| 336 | |
| 337 ceToCharsStartingWith = new CEToStringsMap(status); | |
| 338 | |
| 339 if (U_FAILURE(status)) { | |
| 340 goto bail; | |
| 341 } | |
| 342 | |
| 343 #ifdef CLONE_COLLATOR | |
| 344 coll = ucol_safeClone(collator, NULL, NULL, &status); | |
| 345 | |
| 346 if (U_FAILURE(status)) { | |
| 347 goto bail; | |
| 348 } | |
| 349 #else | |
| 350 coll = collator; | |
| 351 #endif | |
| 352 | |
| 353 ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &st
atus); | |
| 354 | |
| 355 uset_addAll(charsToTest, contractions); | |
| 356 uset_addAll(charsToTest, expansions); | |
| 357 uset_removeAll(charsToTest, charsToRemove); | |
| 358 | |
| 359 itemCount = uset_getItemCount(charsToTest); | |
| 360 for(int32_t item = 0; item < itemCount; item += 1) { | |
| 361 UChar32 start = 0, end = 0; | |
| 362 UChar buffer[16]; | |
| 363 int32_t len = uset_getItem(charsToTest, item, &start, &end, | |
| 364 buffer, 16, &status); | |
| 365 | |
| 366 if (len == 0) { | |
| 367 for (UChar32 ch = start; ch <= end; ch += 1) { | |
| 368 UnicodeString *st = new UnicodeString(ch); | |
| 369 | |
| 370 if (st == NULL) { | |
| 371 status = U_MEMORY_ALLOCATION_ERROR; | |
| 372 break; | |
| 373 } | |
| 374 | |
| 375 CEList *ceList = new CEList(coll, *st, status); | |
| 376 | |
| 377 ceToCharsStartingWith->put(ceList->get(0), st, status); | |
| 378 | |
| 379 delete ceList; | |
| 380 delete st; | |
| 381 } | |
| 382 } else if (len > 0) { | |
| 383 UnicodeString *st = new UnicodeString(buffer, len); | |
| 384 | |
| 385 if (st == NULL) { | |
| 386 status = U_MEMORY_ALLOCATION_ERROR; | |
| 387 break; | |
| 388 } | |
| 389 | |
| 390 CEList *ceList = new CEList(coll, *st, status); | |
| 391 | |
| 392 ceToCharsStartingWith->put(ceList->get(0), st, status); | |
| 393 | |
| 394 delete ceList; | |
| 395 delete st; | |
| 396 } else { | |
| 397 // shouldn't happen... | |
| 398 } | |
| 399 | |
| 400 if (U_FAILURE(status)) { | |
| 401 break; | |
| 402 } | |
| 403 } | |
| 404 | |
| 405 bail: | |
| 406 uset_close(contractions); | |
| 407 uset_close(expansions); | |
| 408 uset_close(charsToRemove); | |
| 409 uset_close(charsToTest); | |
| 410 | |
| 411 if (U_FAILURE(status)) { | |
| 412 return; | |
| 413 } | |
| 414 | |
| 415 UnicodeSet hanRanges(UNICODE_STRING_SIMPLE("[:Unified_Ideograph:]"), status)
; | |
| 416 if (U_FAILURE(status)) { | |
| 417 return; | |
| 418 } | |
| 419 UnicodeSetIterator hanIter(hanRanges); | |
| 420 UnicodeString hanString; | |
| 421 while(hanIter.nextRange()) { | |
| 422 hanString.append(hanIter.getCodepoint()); | |
| 423 hanString.append(hanIter.getCodepointEnd()); | |
| 424 } | |
| 425 // TODO: Why U+11FF? The old code had an outdated UCOL_LAST_T_JAMO=0x11F9, | |
| 426 // but as of Unicode 6.3 the 11xx block is filled, | |
| 427 // and there are also more Jamo T at U+D7CB..U+D7FB. | |
| 428 // Maybe use [:HST=T:] and look for the end of the last range? | |
| 429 // Maybe use script boundary mappings instead of this code?? | |
| 430 UChar jamoRanges[] = {Hangul::JAMO_L_BASE, Hangul::JAMO_V_BASE, Hangul::JAM
O_T_BASE + 1, 0x11FF}; | |
| 431 UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges)); | |
| 432 CEList hanList(coll, hanString, status); | |
| 433 CEList jamoList(coll, jamoString, status); | |
| 434 int32_t j = 0; | |
| 435 | |
| 436 if (U_FAILURE(status)) { | |
| 437 return; | |
| 438 } | |
| 439 | |
| 440 for (int32_t c = 0; c < jamoList.size(); c += 1) { | |
| 441 uint32_t jce = jamoList[c]; | |
| 442 | |
| 443 if (! isContinuation(jce)) { | |
| 444 jamoLimits[j++] = jce; | |
| 445 } | |
| 446 } | |
| 447 | |
| 448 jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT); | |
| 449 | |
| 450 minHan = 0xFFFFFFFF; | |
| 451 maxHan = 0; | |
| 452 | |
| 453 for(int32_t h = 0; h < hanList.size(); h += 2) { | |
| 454 uint32_t han = (uint32_t) hanList[h]; | |
| 455 | |
| 456 if (han < minHan) { | |
| 457 minHan = han; | |
| 458 } | |
| 459 | |
| 460 if (han > maxHan) { | |
| 461 maxHan = han; | |
| 462 } | |
| 463 } | |
| 464 | |
| 465 maxHan += (1 << UCOL_PRIMARYORDERSHIFT); | |
| 466 } | |
| 467 | |
| 468 CollData::~CollData() | |
| 469 { | |
| 470 #ifdef CLONE_COLLATOR | |
| 471 ucol_close(coll); | |
| 472 #endif | |
| 473 | |
| 474 delete ceToCharsStartingWith; | |
| 475 } | |
| 476 | |
| 477 UCollator *CollData::getCollator() const | |
| 478 { | |
| 479 return coll; | |
| 480 } | |
| 481 | |
| 482 const StringList *CollData::getStringList(int32_t ce) const | |
| 483 { | |
| 484 return ceToCharsStartingWith->getStringList(ce); | |
| 485 } | |
| 486 | |
| 487 const CEList *CollData::getCEList(const UnicodeString *string) const | |
| 488 { | |
| 489 UErrorCode status = U_ZERO_ERROR; | |
| 490 const CEList *list = new CEList(coll, *string, status); | |
| 491 | |
| 492 if (U_FAILURE(status)) { | |
| 493 delete list; | |
| 494 list = NULL; | |
| 495 } | |
| 496 | |
| 497 return list; | |
| 498 } | |
| 499 | |
| 500 void CollData::freeCEList(const CEList *list) | |
| 501 { | |
| 502 delete list; | |
| 503 } | |
| 504 | |
| 505 int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t
*history) const | |
| 506 { | |
| 507 // find out shortest string for the longest sequence of ces. | |
| 508 // this can probably be folded with the minLengthCache... | |
| 509 | |
| 510 if (history[offset] >= 0) { | |
| 511 return history[offset]; | |
| 512 } | |
| 513 | |
| 514 uint32_t ce = ceList->get(offset); | |
| 515 int32_t maxOffset = ceList->size(); | |
| 516 int32_t shortestLength = INT32_MAX; | |
| 517 const StringList *strings = ceToCharsStartingWith->getStringList(ce); | |
| 518 | |
| 519 if (strings != NULL) { | |
| 520 int32_t stringCount = strings->size(); | |
| 521 | |
| 522 for (int32_t s = 0; s < stringCount; s += 1) { | |
| 523 const UnicodeString *string = strings->get(s); | |
| 524 UErrorCode status = U_ZERO_ERROR; | |
| 525 const CEList *ceList2 = new CEList(coll, *string, status); | |
| 526 | |
| 527 if (U_FAILURE(status)) { | |
| 528 delete ceList2; | |
| 529 ceList2 = NULL; | |
| 530 } | |
| 531 | |
| 532 if (ceList->matchesAt(offset, ceList2)) { | |
| 533 U_ASSERT(ceList2 != NULL); | |
| 534 int32_t clength = ceList2->size(); | |
| 535 int32_t slength = string->length(); | |
| 536 int32_t roffset = offset + clength; | |
| 537 int32_t rlength = 0; | |
| 538 | |
| 539 if (roffset < maxOffset) { | |
| 540 rlength = minLengthInChars(ceList, roffset, history); | |
| 541 | |
| 542 if (rlength <= 0) { | |
| 543 // delete before continue to avoid memory leak. | |
| 544 delete ceList2; | |
| 545 | |
| 546 // ignore any dead ends | |
| 547 continue; | |
| 548 } | |
| 549 } | |
| 550 | |
| 551 if (shortestLength > slength + rlength) { | |
| 552 shortestLength = slength + rlength; | |
| 553 } | |
| 554 } | |
| 555 | |
| 556 delete ceList2; | |
| 557 } | |
| 558 } | |
| 559 | |
| 560 if (shortestLength == INT32_MAX) { | |
| 561 // No matching strings at this offset. See if | |
| 562 // the CE is in a range we can handle manually. | |
| 563 if (ce >= minHan && ce < maxHan) { | |
| 564 // all han have implicit orders which | |
| 565 // generate two CEs. | |
| 566 int32_t roffset = offset + 2; | |
| 567 int32_t rlength = 0; | |
| 568 | |
| 569 //history[roffset++] = -1; | |
| 570 //history[roffset++] = 1; | |
| 571 | |
| 572 if (roffset < maxOffset) { | |
| 573 rlength = minLengthInChars(ceList, roffset, history); | |
| 574 } | |
| 575 | |
| 576 if (rlength < 0) { | |
| 577 return -1; | |
| 578 } | |
| 579 | |
| 580 shortestLength = 1 + rlength; | |
| 581 goto have_shortest; | |
| 582 } else if (ce >= jamoLimits[0] && ce < jamoLimits[3]) { | |
| 583 int32_t roffset = offset; | |
| 584 int32_t rlength = 0; | |
| 585 | |
| 586 // **** this loop may not handle archaic Hangul correctly **** | |
| 587 for (int32_t j = 0; roffset < maxOffset && j < 4; j += 1, roffset +=
1) { | |
| 588 uint32_t jce = ceList->get(roffset); | |
| 589 | |
| 590 // Some Jamo have 24-bit primary order; skip the | |
| 591 // 2nd CE. This should always be OK because if | |
| 592 // we're still in the loop all we've seen are | |
| 593 // a series of Jamo in LVT order. | |
| 594 if (isContinuation(jce)) { | |
| 595 continue; | |
| 596 } | |
| 597 | |
| 598 if (j >= 3 || jce < jamoLimits[j] || jce >= jamoLimits[j + 1]) { | |
| 599 break; | |
| 600 } | |
| 601 } | |
| 602 | |
| 603 if (roffset == offset) { | |
| 604 // we started with a non-L Jamo... | |
| 605 // just say it comes from a single character | |
| 606 roffset += 1; | |
| 607 | |
| 608 // See if the single Jamo has a 24-bit order. | |
| 609 if (roffset < maxOffset && isContinuation(ceList->get(roffset)))
{ | |
| 610 roffset += 1; | |
| 611 } | |
| 612 } | |
| 613 | |
| 614 if (roffset < maxOffset) { | |
| 615 rlength = minLengthInChars(ceList, roffset, history); | |
| 616 } | |
| 617 | |
| 618 if (rlength < 0) { | |
| 619 return -1; | |
| 620 } | |
| 621 | |
| 622 shortestLength = 1 + rlength; | |
| 623 goto have_shortest; | |
| 624 } | |
| 625 | |
| 626 // Can't handle it manually either. Just move on. | |
| 627 return -1; | |
| 628 } | |
| 629 | |
| 630 have_shortest: | |
| 631 history[offset] = shortestLength; | |
| 632 | |
| 633 return shortestLength; | |
| 634 } | |
| 635 | |
| 636 int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const | |
| 637 { | |
| 638 int32_t clength = ceList->size(); | |
| 639 int32_t *history = NEW_ARRAY(int32_t, clength); | |
| 640 | |
| 641 for (int32_t i = 0; i < clength; i += 1) { | |
| 642 history[i] = -1; | |
| 643 } | |
| 644 | |
| 645 int32_t minLength = minLengthInChars(ceList, offset, history); | |
| 646 | |
| 647 DELETE_ARRAY(history); | |
| 648 | |
| 649 return minLength; | |
| 650 } | |
| 651 | |
| 652 #endif // #if !UCONFIG_NO_COLLATION | |
| OLD | NEW |