OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2001-2010 IBM and others. All rights reserved. |
| 4 ********************************************************************** |
| 5 * Date Name Description |
| 6 * 08/13/2001 synwee Creation. |
| 7 ********************************************************************** |
| 8 */ |
| 9 #ifndef USRCHIMP_H |
| 10 #define USRCHIMP_H |
| 11 |
| 12 #include "unicode/utypes.h" |
| 13 |
| 14 #if !UCONFIG_NO_COLLATION |
| 15 |
| 16 #include "unicode/normalizer2.h" |
| 17 #include "unicode/ucol.h" |
| 18 #include "unicode/ucoleitr.h" |
| 19 #include "unicode/ubrk.h" |
| 20 |
| 21 #define INITIAL_ARRAY_SIZE_ 256 |
| 22 #define MAX_TABLE_SIZE_ 257 |
| 23 |
| 24 struct USearch { |
| 25 // required since collation element iterator does not have a getText API |
| 26 const UChar *text; |
| 27 int32_t textLength; // exact length |
| 28 UBool isOverlap; |
| 29 UBool isCanonicalMatch; |
| 30 int16_t elementComparisonType; |
| 31 UBreakIterator *internalBreakIter; //internal character breakiter
ator |
| 32 UBreakIterator *breakIter; |
| 33 // value USEARCH_DONE is the default value |
| 34 // if we are not at the start of the text or the end of the text, |
| 35 // depending on the iteration direction and matchedIndex is USEARCH_DONE |
| 36 // it means that we can't find any more matches in that particular direction |
| 37 int32_t matchedIndex; |
| 38 int32_t matchedLength; |
| 39 UBool isForwardSearching; |
| 40 UBool reset; |
| 41 }; |
| 42 |
| 43 struct UPattern { |
| 44 const UChar *text; |
| 45 int32_t textLength; // exact length |
| 46 // length required for backwards ce comparison |
| 47 int32_t CELength; |
| 48 int32_t *CE; |
| 49 int32_t CEBuffer[INITIAL_ARRAY_SIZE_]; |
| 50 int32_t PCELength; |
| 51 int64_t *PCE; |
| 52 int64_t PCEBuffer[INITIAL_ARRAY_SIZE_]; |
| 53 UBool hasPrefixAccents; |
| 54 UBool hasSuffixAccents; |
| 55 int16_t defaultShiftSize; |
| 56 int16_t shift[MAX_TABLE_SIZE_]; |
| 57 int16_t backShift[MAX_TABLE_SIZE_]; |
| 58 }; |
| 59 |
| 60 struct UStringSearch { |
| 61 struct USearch *search; |
| 62 struct UPattern pattern; |
| 63 const UCollator *collator; |
| 64 const U_NAMESPACE_QUALIFIER Normalizer2 *nfd; |
| 65 // positions within the collation element iterator is used to determine |
| 66 // if we are at the start of the text. |
| 67 UCollationElements *textIter; |
| 68 // utility collation element, used throughout program for temporary |
| 69 // iteration. |
| 70 UCollationElements *utilIter; |
| 71 UBool ownCollator; |
| 72 UCollationStrength strength; |
| 73 uint32_t ceMask; |
| 74 uint32_t variableTop; |
| 75 UBool toShift; |
| 76 UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; |
| 77 UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; |
| 78 }; |
| 79 |
| 80 /** |
| 81 * Exact matches without checking for the ends for extra accents. |
| 82 * The match after the position within the collation element iterator is to be |
| 83 * found. |
| 84 * After a match is found the offset in the collation element iterator will be |
| 85 * shifted to the start of the match. |
| 86 * Implementation note: |
| 87 * For tertiary we can't use the collator->tertiaryMask, that is a |
| 88 * preprocessed mask that takes into account case options. since we are only |
| 89 * concerned with exact matches, we don't need that. |
| 90 * Alternate handling - since only the 16 most significant digits is only used, |
| 91 * we can safely do a compare without masking if the ce is a variable, we mask |
| 92 * and get only the primary values no shifting to quartenary is required since |
| 93 * all primary values less than variabletop will need to be masked off anyway. |
| 94 * If the end character is composite and the pattern ce does not match the text |
| 95 * ce, we skip it until we find a match in the end composite character or when |
| 96 * it has passed the character. This is so that we can match pattern "a" with |
| 97 * the text "\u00e6" |
| 98 * @param strsrch string search data |
| 99 * @param status error status if any |
| 100 * @return TRUE if an exact match is found, FALSE otherwise |
| 101 */ |
| 102 U_CFUNC |
| 103 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); |
| 104 |
| 105 /** |
| 106 * Canonical matches. |
| 107 * According to the definition, matches found here will include the whole span |
| 108 * of beginning and ending accents if it overlaps that region. |
| 109 * @param strsrch string search data |
| 110 * @param status error status if any |
| 111 * @return TRUE if a canonical match is found, FALSE otherwise |
| 112 */ |
| 113 U_CFUNC |
| 114 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); |
| 115 |
| 116 /** |
| 117 * Gets the previous match. |
| 118 * Comments follows from handleNextExact |
| 119 * @param strsrch string search data |
| 120 * @param status error status if any |
| 121 * @return True if a exact math is found, FALSE otherwise. |
| 122 */ |
| 123 U_CFUNC |
| 124 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); |
| 125 |
| 126 /** |
| 127 * Canonical matches. |
| 128 * According to the definition, matches found here will include the whole span |
| 129 * of beginning and ending accents if it overlaps that region. |
| 130 * @param strsrch string search data |
| 131 * @param status error status if any |
| 132 * @return TRUE if a canonical match is found, FALSE otherwise |
| 133 */ |
| 134 U_CFUNC |
| 135 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, |
| 136 UErrorCode *status); |
| 137 |
| 138 #endif /* #if !UCONFIG_NO_COLLATION */ |
| 139 |
| 140 #endif |
OLD | NEW |