OLD | NEW |
1 /* | 1 /* |
2 ****************************************************************************** | 2 ****************************************************************************** |
3 * Copyright (C) 1996-2012, International Business Machines * | 3 * Copyright (C) 1996-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. * | 4 * Corporation and others. All Rights Reserved. |
5 ****************************************************************************** | 5 ****************************************************************************** |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 | 9 |
10 #if !UCONFIG_NO_COLLATION | 10 #if !UCONFIG_NO_COLLATION |
11 | 11 |
12 #include "unicode/unistr.h" | 12 #include "unicode/unistr.h" |
13 #include "unicode/usearch.h" | 13 #include "unicode/usearch.h" |
14 | 14 |
15 #include "cmemory.h" | 15 #include "cmemory.h" |
16 #include "unicode/coll.h" | 16 #include "unicode/coll.h" |
17 #include "unicode/tblcoll.h" | 17 #include "unicode/tblcoll.h" |
18 #include "unicode/coleitr.h" | 18 #include "unicode/coleitr.h" |
19 #include "unicode/ucoleitr.h" | 19 #include "unicode/ucoleitr.h" |
20 | 20 |
21 #include "unicode/regex.h" // TODO: make conditional on regexp being buil
t. | 21 #include "unicode/regex.h" // TODO: make conditional on regexp being buil
t. |
22 | 22 |
23 #include "unicode/uniset.h" | 23 #include "unicode/uniset.h" |
24 #include "unicode/uset.h" | 24 #include "unicode/uset.h" |
| 25 #include "unicode/usetiter.h" |
25 #include "unicode/ustring.h" | 26 #include "unicode/ustring.h" |
26 #include "hash.h" | 27 #include "hash.h" |
| 28 #include "normalizer2impl.h" |
27 #include "uhash.h" | 29 #include "uhash.h" |
28 #include "ucol_imp.h" | 30 #include "usrchimp.h" |
29 #include "uassert.h" | 31 #include "uassert.h" |
30 | 32 |
31 #include "colldata.h" | 33 #include "colldata.h" |
32 | 34 |
33 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) | 35 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) |
34 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) | 36 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) |
35 #define DELETE_ARRAY(array) uprv_free((void *) (array)) | 37 #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src),
(count) * sizeof (src)[0]) | 38 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src),
(count) * sizeof (src)[0]) |
37 | 39 |
38 CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) | 40 CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) |
39 : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) | 41 : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) |
40 { | 42 { |
41 UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), stri
ng.length(), &status); | 43 UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), stri
ng.length(), &status); |
42 UCollationStrength strength = ucol_getStrength(coll); | 44 UCollationStrength strength = ucol_getStrength(coll); |
43 UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==
UCOL_SHIFTED; | 45 UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==
UCOL_SHIFTED; |
44 uint32_t variableTop = ucol_getVariableTop(coll, &status); | 46 uint32_t variableTop = ucol_getVariableTop(coll, &status); |
45 uint32_t strengthMask = 0; | 47 uint32_t strengthMask = 0; |
46 int32_t order; | 48 int32_t order; |
47 | 49 |
48 if (U_FAILURE(status)) { | 50 if (U_FAILURE(status)) { |
49 return; | 51 return; |
50 } | 52 } |
51 | 53 |
52 // **** only set flag if string has Han(gul) **** | 54 // **** only set flag if string has Han(gul) **** |
53 ucol_forceHanImplicit(elems, &status); | 55 // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476 |
54 | 56 |
55 switch (strength) | 57 switch (strength) |
56 { | 58 { |
57 default: | 59 default: |
58 strengthMask |= UCOL_TERTIARYORDERMASK; | 60 strengthMask |= UCOL_TERTIARYORDERMASK; |
59 /* fall through */ | 61 /* fall through */ |
60 | 62 |
61 case UCOL_SECONDARY: | 63 case UCOL_SECONDARY: |
62 strengthMask |= UCOL_SECONDARYORDERMASK; | 64 strengthMask |= UCOL_SECONDARYORDERMASK; |
63 /* fall through */ | 65 /* fall through */ |
(...skipping 339 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
403 bail: | 405 bail: |
404 uset_close(contractions); | 406 uset_close(contractions); |
405 uset_close(expansions); | 407 uset_close(expansions); |
406 uset_close(charsToRemove); | 408 uset_close(charsToRemove); |
407 uset_close(charsToTest); | 409 uset_close(charsToTest); |
408 | 410 |
409 if (U_FAILURE(status)) { | 411 if (U_FAILURE(status)) { |
410 return; | 412 return; |
411 } | 413 } |
412 | 414 |
413 UChar32 hanRanges[] = {UCOL_FIRST_HAN, UCOL_LAST_HAN, UCOL_FIRST_HAN_COMPAT
, UCOL_LAST_HAN_COMPAT, UCOL_FIRST_HAN_A, UCOL_LAST_HAN_A, | 415 UnicodeSet hanRanges(UNICODE_STRING_SIMPLE("[:Unified_Ideograph:]"), status)
; |
414 UCOL_FIRST_HAN_B, UCOL_LAST_HAN_B}; | 416 if (U_FAILURE(status)) { |
415 UChar jamoRanges[] = {UCOL_FIRST_L_JAMO, UCOL_FIRST_V_JAMO, UCOL_FIRST_T_J
AMO, UCOL_LAST_T_JAMO}; | 417 return; |
416 UnicodeString hanString = UnicodeString::fromUTF32(hanRanges, ARRAY_SIZE(ha
nRanges)); | 418 } |
| 419 UnicodeSetIterator hanIter(hanRanges); |
| 420 UnicodeString hanString; |
| 421 while(hanIter.nextRange()) { |
| 422 hanString.append(hanIter.getCodepoint()); |
| 423 hanString.append(hanIter.getCodepointEnd()); |
| 424 } |
| 425 // TODO: Why U+11FF? The old code had an outdated UCOL_LAST_T_JAMO=0x11F9, |
| 426 // but as of Unicode 6.3 the 11xx block is filled, |
| 427 // and there are also more Jamo T at U+D7CB..U+D7FB. |
| 428 // Maybe use [:HST=T:] and look for the end of the last range? |
| 429 // Maybe use script boundary mappings instead of this code?? |
| 430 UChar jamoRanges[] = {Hangul::JAMO_L_BASE, Hangul::JAMO_V_BASE, Hangul::JAM
O_T_BASE + 1, 0x11FF}; |
417 UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges)); | 431 UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges)); |
418 CEList hanList(coll, hanString, status); | 432 CEList hanList(coll, hanString, status); |
419 CEList jamoList(coll, jamoString, status); | 433 CEList jamoList(coll, jamoString, status); |
420 int32_t j = 0; | 434 int32_t j = 0; |
421 | 435 |
422 if (U_FAILURE(status)) { | 436 if (U_FAILURE(status)) { |
423 return; | 437 return; |
424 } | 438 } |
425 | 439 |
426 for (int32_t c = 0; c < jamoList.size(); c += 1) { | 440 for (int32_t c = 0; c < jamoList.size(); c += 1) { |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
629 } | 643 } |
630 | 644 |
631 int32_t minLength = minLengthInChars(ceList, offset, history); | 645 int32_t minLength = minLengthInChars(ceList, offset, history); |
632 | 646 |
633 DELETE_ARRAY(history); | 647 DELETE_ARRAY(history); |
634 | 648 |
635 return minLength; | 649 return minLength; |
636 } | 650 } |
637 | 651 |
638 #endif // #if !UCONFIG_NO_COLLATION | 652 #endif // #if !UCONFIG_NO_COLLATION |
OLD | NEW |