| OLD | NEW |
| 1 /* | 1 /* |
| 2 ****************************************************************************** | 2 ****************************************************************************** |
| 3 * Copyright (C) 1996-2012, International Business Machines * | 3 * Copyright (C) 1996-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. * | 4 * Corporation and others. All Rights Reserved. |
| 5 ****************************************************************************** | 5 ****************************************************************************** |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
| 9 | 9 |
| 10 #if !UCONFIG_NO_COLLATION | 10 #if !UCONFIG_NO_COLLATION |
| 11 | 11 |
| 12 #include "unicode/unistr.h" | 12 #include "unicode/unistr.h" |
| 13 #include "unicode/usearch.h" | 13 #include "unicode/usearch.h" |
| 14 | 14 |
| 15 #include "cmemory.h" | 15 #include "cmemory.h" |
| 16 #include "unicode/coll.h" | 16 #include "unicode/coll.h" |
| 17 #include "unicode/tblcoll.h" | 17 #include "unicode/tblcoll.h" |
| 18 #include "unicode/coleitr.h" | 18 #include "unicode/coleitr.h" |
| 19 #include "unicode/ucoleitr.h" | 19 #include "unicode/ucoleitr.h" |
| 20 | 20 |
| 21 #include "unicode/regex.h" // TODO: make conditional on regexp being buil
t. | 21 #include "unicode/regex.h" // TODO: make conditional on regexp being buil
t. |
| 22 | 22 |
| 23 #include "unicode/uniset.h" | 23 #include "unicode/uniset.h" |
| 24 #include "unicode/uset.h" | 24 #include "unicode/uset.h" |
| 25 #include "unicode/usetiter.h" |
| 25 #include "unicode/ustring.h" | 26 #include "unicode/ustring.h" |
| 26 #include "hash.h" | 27 #include "hash.h" |
| 28 #include "normalizer2impl.h" |
| 27 #include "uhash.h" | 29 #include "uhash.h" |
| 28 #include "ucol_imp.h" | 30 #include "usrchimp.h" |
| 29 #include "uassert.h" | 31 #include "uassert.h" |
| 30 | 32 |
| 31 #include "colldata.h" | 33 #include "colldata.h" |
| 32 | 34 |
| 33 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) | 35 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) |
| 34 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) | 36 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) |
| 35 #define DELETE_ARRAY(array) uprv_free((void *) (array)) | 37 #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
| 36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src),
(count) * sizeof (src)[0]) | 38 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src),
(count) * sizeof (src)[0]) |
| 37 | 39 |
| 38 CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) | 40 CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) |
| 39 : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) | 41 : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) |
| 40 { | 42 { |
| 41 UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), stri
ng.length(), &status); | 43 UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), stri
ng.length(), &status); |
| 42 UCollationStrength strength = ucol_getStrength(coll); | 44 UCollationStrength strength = ucol_getStrength(coll); |
| 43 UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==
UCOL_SHIFTED; | 45 UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==
UCOL_SHIFTED; |
| 44 uint32_t variableTop = ucol_getVariableTop(coll, &status); | 46 uint32_t variableTop = ucol_getVariableTop(coll, &status); |
| 45 uint32_t strengthMask = 0; | 47 uint32_t strengthMask = 0; |
| 46 int32_t order; | 48 int32_t order; |
| 47 | 49 |
| 48 if (U_FAILURE(status)) { | 50 if (U_FAILURE(status)) { |
| 49 return; | 51 return; |
| 50 } | 52 } |
| 51 | 53 |
| 52 // **** only set flag if string has Han(gul) **** | 54 // **** only set flag if string has Han(gul) **** |
| 53 ucol_forceHanImplicit(elems, &status); | 55 // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476 |
| 54 | 56 |
| 55 switch (strength) | 57 switch (strength) |
| 56 { | 58 { |
| 57 default: | 59 default: |
| 58 strengthMask |= UCOL_TERTIARYORDERMASK; | 60 strengthMask |= UCOL_TERTIARYORDERMASK; |
| 59 /* fall through */ | 61 /* fall through */ |
| 60 | 62 |
| 61 case UCOL_SECONDARY: | 63 case UCOL_SECONDARY: |
| 62 strengthMask |= UCOL_SECONDARYORDERMASK; | 64 strengthMask |= UCOL_SECONDARYORDERMASK; |
| 63 /* fall through */ | 65 /* fall through */ |
| (...skipping 339 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 403 bail: | 405 bail: |
| 404 uset_close(contractions); | 406 uset_close(contractions); |
| 405 uset_close(expansions); | 407 uset_close(expansions); |
| 406 uset_close(charsToRemove); | 408 uset_close(charsToRemove); |
| 407 uset_close(charsToTest); | 409 uset_close(charsToTest); |
| 408 | 410 |
| 409 if (U_FAILURE(status)) { | 411 if (U_FAILURE(status)) { |
| 410 return; | 412 return; |
| 411 } | 413 } |
| 412 | 414 |
| 413 UChar32 hanRanges[] = {UCOL_FIRST_HAN, UCOL_LAST_HAN, UCOL_FIRST_HAN_COMPAT
, UCOL_LAST_HAN_COMPAT, UCOL_FIRST_HAN_A, UCOL_LAST_HAN_A, | 415 UnicodeSet hanRanges(UNICODE_STRING_SIMPLE("[:Unified_Ideograph:]"), status)
; |
| 414 UCOL_FIRST_HAN_B, UCOL_LAST_HAN_B}; | 416 if (U_FAILURE(status)) { |
| 415 UChar jamoRanges[] = {UCOL_FIRST_L_JAMO, UCOL_FIRST_V_JAMO, UCOL_FIRST_T_J
AMO, UCOL_LAST_T_JAMO}; | 417 return; |
| 416 UnicodeString hanString = UnicodeString::fromUTF32(hanRanges, ARRAY_SIZE(ha
nRanges)); | 418 } |
| 419 UnicodeSetIterator hanIter(hanRanges); |
| 420 UnicodeString hanString; |
| 421 while(hanIter.nextRange()) { |
| 422 hanString.append(hanIter.getCodepoint()); |
| 423 hanString.append(hanIter.getCodepointEnd()); |
| 424 } |
| 425 // TODO: Why U+11FF? The old code had an outdated UCOL_LAST_T_JAMO=0x11F9, |
| 426 // but as of Unicode 6.3 the 11xx block is filled, |
| 427 // and there are also more Jamo T at U+D7CB..U+D7FB. |
| 428 // Maybe use [:HST=T:] and look for the end of the last range? |
| 429 // Maybe use script boundary mappings instead of this code?? |
| 430 UChar jamoRanges[] = {Hangul::JAMO_L_BASE, Hangul::JAMO_V_BASE, Hangul::JAM
O_T_BASE + 1, 0x11FF}; |
| 417 UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges)); | 431 UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges)); |
| 418 CEList hanList(coll, hanString, status); | 432 CEList hanList(coll, hanString, status); |
| 419 CEList jamoList(coll, jamoString, status); | 433 CEList jamoList(coll, jamoString, status); |
| 420 int32_t j = 0; | 434 int32_t j = 0; |
| 421 | 435 |
| 422 if (U_FAILURE(status)) { | 436 if (U_FAILURE(status)) { |
| 423 return; | 437 return; |
| 424 } | 438 } |
| 425 | 439 |
| 426 for (int32_t c = 0; c < jamoList.size(); c += 1) { | 440 for (int32_t c = 0; c < jamoList.size(); c += 1) { |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 629 } | 643 } |
| 630 | 644 |
| 631 int32_t minLength = minLengthInChars(ceList, offset, history); | 645 int32_t minLength = minLengthInChars(ceList, offset, history); |
| 632 | 646 |
| 633 DELETE_ARRAY(history); | 647 DELETE_ARRAY(history); |
| 634 | 648 |
| 635 return minLength; | 649 return minLength; |
| 636 } | 650 } |
| 637 | 651 |
| 638 #endif // #if !UCONFIG_NO_COLLATION | 652 #endif // #if !UCONFIG_NO_COLLATION |
| OLD | NEW |