| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * Copyright (c) 2001-2011,2015 International Business Machines | |
| 3 * Corporation and others. All Rights Reserved. | |
| 4 ******************************************************************** | |
| 5 * File USRCHDAT.H | |
| 6 * Modification History: | |
| 7 * Name date Description | |
| 8 * synwee July 31 2001 creation | |
| 9 ********************************************************************/ | |
| 10 | |
| 11 | |
| 12 /* | |
| 13 Note: This file is included by other C and C++ files. This file should not be di
rectly compiled. | |
| 14 */ | |
| 15 #ifndef USRCHDAT_C | |
| 16 #define USRCHDAT_C | |
| 17 | |
| 18 #include "unicode/ucol.h" | |
| 19 | |
| 20 #if !UCONFIG_NO_COLLATION | |
| 21 | |
| 22 /* Set to 1 if matches must be on grapheme boundaries */ | |
| 23 #define GRAPHEME_BOUNDARIES 1 | |
| 24 | |
| 25 U_CDECL_BEGIN | |
| 26 struct SearchData { | |
| 27 const char *text; | |
| 28 const char *pattern; | |
| 29 const char *collator; /* currently supported "fr" "es" "de", p
lus NULL/other => "en" */ | |
| 30 UCollationStrength strength; | |
| 31 USearchAttributeValue elemCompare; /* value for the USEARCH_ELEMENT_CO
MPARISON attribute */ | |
| 32 const char *breaker; /* currently supported "wordbreaker" for
EN_WORDBREAKER_, plus NULL/other => EN_CHARACTERBREAKER_ */ | |
| 33 int8_t offset[32]; | |
| 34 uint8_t size[32]; | |
| 35 }; | |
| 36 U_CDECL_END | |
| 37 | |
| 38 typedef struct SearchData SearchData; | |
| 39 | |
| 40 static const char *TESTCOLLATORRULE = "& o,O ; p,P"; | |
| 41 | |
| 42 static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \
\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc"; | |
| 43 | |
| 44 static const SearchData BASIC[] = { | |
| 45 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {-1}, {0}}, | |
| 46 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEM
ENT_COMPARISON, NULL, {13, -1}, | |
| 47 {6}}, | |
| 48 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
| 49 {13, 20, -1}, {6, 6}}, | |
| 50 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
| 51 {6, 20, -1}, {6, 6}}, | |
| 52 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {0, 14, -1}, | |
| 53 {6, 6}}, | |
| 54 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {1, -1}, {1}}, | |
| 55 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {5, -1}, {1}}, | |
| 56 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 57 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
| 58 | |
| 59 #if GRAPHEME_BOUNDARIES | |
| 60 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 61 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 62 #else | |
| 63 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
| 64 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, | |
| 65 #endif | |
| 66 | |
| 67 {"\\u00c9", "e", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, | |
| 68 {"x a\\u0301", "a\\u0301", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {2}}, | |
| 69 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 70 }; | |
| 71 | |
| 72 static const SearchData BREAKITERATOREXACT[] = { | |
| 73 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "characterbreaker", {0, 5, -1}, | |
| 74 {3, 3}}, | |
| 75 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "wordbreaker", {5, -1}, {3}}, | |
| 76 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, | |
| 77 "characterbreaker", {10, 14, -1}, {3, 2}}, | |
| 78 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", | |
| 79 {10, -1}, {3}}, | |
| 80 {"Channel, another channel, more channels, and one last Channel", | |
| 81 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbr
eaker", {0, 54, -1}, {7, 7}}, | |
| 82 /* jitterbug 1745 */ | |
| 83 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, | |
| 84 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, | |
| 85 {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
| 86 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1,
28, 41, -1}, {1, 1, 1}}, | |
| 87 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "
characterbreaker", {0, -1}, {1}}, | |
| 88 #if 0 | |
| 89 /* Problem reported by Dave Bertoni, same as ticket 4279? */ | |
| 90 {"\\u0043\\u004F\\u0302\\u0054\\u00C9", "\\u004F", NULL, UCOL_TERTIARY, USEA
RCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, -1}, {2}}, | |
| 91 #endif | |
| 92 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 93 }; | |
| 94 | |
| 95 #define PECHE_WITH_ACCENTS "un p\\u00E9ch\\u00E9, " \ | |
| 96 "\\u00E7a p\\u00E8che par, " \ | |
| 97 "p\\u00E9cher, " \ | |
| 98 "une p\\u00EAche, " \ | |
| 99 "un p\\u00EAcher, " \ | |
| 100 "j\\u2019ai p\\u00EAch\\u00E9, " \ | |
| 101 "un p\\u00E9cheur, " \ | |
| 102 "\\u201Cp\\u00E9che\\u201D, " \ | |
| 103 "decomp peche\\u0301, " \ | |
| 104 "base peche" | |
| 105 /* in the above, the interesting words and their offsets are: | |
| 106 3 pe<301>che<301> | |
| 107 13 pe<300>che | |
| 108 24 pe<301>cher | |
| 109 36 pe<302>che | |
| 110 46 pe<302>cher | |
| 111 59 pe<302>che<301> | |
| 112 69 pe<301>cheur | |
| 113 79 pe<301>che | |
| 114 94 peche<+301> | |
| 115 107 peche | |
| 116 */ | |
| 117 | |
| 118 static const SearchData STRENGTH[] = { | |
| 119 /*012345678901234567890123456789012345678901234567890123456789*/ | |
| 120 /*00*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
| 121 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}
, {3, 3}}, | |
| 122 /*01*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
| 123 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16
, -1}, {3}}, | |
| 124 /*02*/{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAch
er Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat
toe big Toe", | |
| 125 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NUL
L, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, | |
| 126 /*03*/{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
| 127 {10, 14, -1}, {3, 2}}, | |
| 128 /*04*/{"A channel, another CHANNEL, more Channels, and one last channel...", | |
| 129 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {2, 19, 33, 56, -1}, {7, 7, 7, 7}}, | |
| 130 /*05*/{"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, U
SEARCH_STANDARD_ELEMENT_COMPARISON, | |
| 131 NULL, {0, -1}, {1, 0}}, | |
| 132 /* some tests for modified element comparison, ticket #7093 */ | |
| 133 /*06*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5
, 5, 5, 5, 5, 6, 5}}, | |
| 134 /*07*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5
, 6, 5}}, | |
| 135 /*08*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {107, -1}, {5}}, | |
| 136 /*09*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
| 137 /*10*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5
, 5, 5, 6, 5}}, | |
| 138 /*11*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, | |
| 139 /*12*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, | |
| 140 /*13*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, | |
| 141 /*14*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, | |
| 142 /*15*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}
, | |
| 143 /*16*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, | |
| 144 /*17*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
| 145 /*18*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
| 146 /*19*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5,
5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
| 147 /*20*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5,
5, 5, 5, 6, 5}}, | |
| 148 /*21*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
| 149 /*22*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
| 150 /*23*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5
, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
| 151 /*24*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 6, 5}}, | |
| 152 /* more tests for modified element comparison (with fr), ticket #7093
*/ | |
| 153 /*25*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5
, 5, 5, 5, 5, 6, 5}}, | |
| 154 /*26*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5
, 6, 5}}, | |
| 155 /*27*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {107, -1}, {5}}, | |
| 156 /*28*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
| 157 /*29*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5
, 5, 5, 6, 5}}, | |
| 158 /*30*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, | |
| 159 /*31*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, | |
| 160 /*32*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, | |
| 161 /*33*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, | |
| 162 /*34*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}
, | |
| 163 /*35*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, | |
| 164 /*36*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
| 165 /*37*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
| 166 /*38*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5,
5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
| 167 /*39*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5,
5, 5, 5, 6, 5}}, | |
| 168 /*40*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
| 169 /*41*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
| 170 /*42*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5
, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
| 171 /*43*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 6, 5}}, | |
| 172 | |
| 173 #if 0 | |
| 174 /* Ticket 5382 */ | |
| 175 {"12\\u0171", "\\u0170", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {2}}, | |
| 176 #endif | |
| 177 | |
| 178 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 179 }; | |
| 180 | |
| 181 static const SearchData VARIABLE[] = { | |
| 182 /*012345678901234567890123456789012345678901234567890123456789*/ | |
| 183 {"blackbirds black blackbirds blackbird black-bird", | |
| 184 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, 17, 28, 38, -1}, | |
| 185 {9, 9, 9, 10}}, | |
| 186 /* to see that it doesn't go into an infinite loop if the start of text | |
| 187 is a ignorable character */ | |
| 188 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL
, {-1}, {0}}, | |
| 189 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, | |
| 190 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |
| 191 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
| 193 /* testing tightest match */ | |
| 194 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEAR
CH_STANDARD_ELEMENT_COMPARISON, | |
| 195 NULL, {1, -1}, {3}}, | |
| 196 /*012345678901234567890123456789012345678901234567890123456789 */ | |
| 197 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARC
H_STANDARD_ELEMENT_COMPARISON, | |
| 198 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, | |
| 199 /* totally ignorable text */ | |
| 200 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, | |
| 201 NULL, {-1}, {0}}, | |
| 202 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 203 }; | |
| 204 | |
| 205 static const SearchData NORMEXACT[] = { | |
| 206 {"a\\u0300\\u0325", "a\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, | |
| 207 | |
| 208 #if GRAPHEME_BOUNDARIES | |
| 209 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 210 #else | |
| 211 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
| 212 #endif | |
| 213 | |
| 214 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 215 }; | |
| 216 | |
| 217 static const SearchData NONNORMEXACT[] = { | |
| 218 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 219 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 220 }; | |
| 221 | |
| 222 static const SearchData OVERLAP[] = { | |
| 223 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 2, 4, -1}, | |
| 224 {4, 4, 4}}, | |
| 225 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 226 }; | |
| 227 | |
| 228 static const SearchData NONOVERLAP[] = { | |
| 229 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 4, -1}, {4, 4}}, | |
| 230 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 231 }; | |
| 232 | |
| 233 static const SearchData COLLATOR[] = { | |
| 234 /* english */ | |
| 235 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {3}}, | |
| 236 /* tailored */ | |
| 237 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 4, -1}, {3, 3}}, | |
| 238 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 239 }; | |
| 240 | |
| 241 static const SearchData PATTERN[] = { | |
| 242 {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
| 243 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3
, 3}}, | |
| 244 {"The quick brown fox jumps over the lazy foxes", "fox", NULL, | |
| 245 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, | |
| 246 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 247 }; | |
| 248 | |
| 249 static const SearchData TEXT[] = { | |
| 250 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {4, 15, -1}, | |
| 251 {3, 3}}, | |
| 252 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {16, -1}, | |
| 253 {3}}, | |
| 254 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 255 }; | |
| 256 | |
| 257 static const SearchData COMPOSITEBOUNDARIES[] = { | |
| 258 #if GRAPHEME_BOUNDARIES | |
| 259 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {-1}, {0}}, | |
| 260 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
| 261 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
| 262 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
| 263 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
| 264 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 265 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
| 266 #else | |
| 267 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, | |
| 268 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
| 269 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
| 270 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
| 271 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
| 272 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 273 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, 1, -1}, | |
| 274 {1, 1}}, | |
| 275 #endif | |
| 276 | |
| 277 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 278 /* A + 030A + 0301 */ | |
| 279 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 280 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {1}}, | |
| 281 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 282 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 283 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 284 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 285 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 286 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 287 | |
| 288 #if GRAPHEME_BOUNDARIES | |
| 289 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 290 #else | |
| 291 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
| 292 #endif | |
| 293 | |
| 294 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 295 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 296 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 297 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 298 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 299 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
| 300 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 301 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 302 | |
| 303 /* Ticket 5024 */ | |
| 304 {"a\\u00e1", "a\\u00e1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {0, -1}, {2}}, | |
| 305 | |
| 306 /* Ticket 5420 */ | |
| 307 {"fu\\u00dfball", "fu\\u00df", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {3}}, | |
| 308 {"fu\\u00dfball", "fuss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {3}}, | |
| 309 {"fu\\u00dfball", "uss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {1, -1}, {2}}, | |
| 310 | |
| 311 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 312 }; | |
| 313 | |
| 314 static const SearchData MATCH[] = { | |
| 315 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, | |
| 316 {7, 26, -1}, {3, 3}}, | |
| 317 /* 012345678901234567890123456789012345678901234567890 */ | |
| 318 {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
| 319 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {
3, 3, 3}}, | |
| 320 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 321 }; | |
| 322 | |
| 323 static const SearchData SUPPLEMENTARY[] = { | |
| 324 /* 0123456789012345678901234567890123456789012345678900123456789012345678901
23456789012345678901234567890012345678901234567890123456789 */ | |
| 325 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD
800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
| 326 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {4, 13, 22, 26, 29, -1}, | |
| 327 {2, 2, 2, 2, 2}}, | |
| 328 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, | |
| 329 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, | |
| 330 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, | |
| 331 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 332 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, | |
| 333 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 334 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, | |
| 335 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 336 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, | |
| 337 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 338 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 339 }; | |
| 340 | |
| 341 static const char *CONTRACTIONRULE = | |
| 342 "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315"; | |
| 343 | |
| 344 static const SearchData CONTRACTION[] = { | |
| 345 /* common discontiguous */ | |
| 346 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
| 347 | |
| 348 #if GRAPHEME_BOUNDARIES | |
| 349 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 350 #else | |
| 351 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
| 352 #endif | |
| 353 | |
| 354 /* contraction prefix */ | |
| 355 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, NULL, {-1}, {0}}, | |
| 356 | |
| 357 #if GRAPHEME_BOUNDARIES | |
| 358 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {-1}, {0}}, | |
| 359 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {-1}, {0}}, | |
| 360 #else | |
| 361 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, -1}, {2}}, | |
| 362 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {1}}, | |
| 363 #endif | |
| 364 | |
| 365 /* discontiguous problem here for backwards iteration. | |
| 366 accents not found because discontiguous stores all information */ | |
| 367 {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, | |
| 368 {0}}, | |
| 369 /* ends not with a contraction character */ | |
| 370 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {-1}, | |
| 371 {0}}, | |
| 372 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, | |
| 373 {0, -1}, {3}}, | |
| 374 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {-1}, | |
| 375 {0}}, | |
| 376 /* blocked discontiguous */ | |
| 377 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, | |
| 378 {-1}, {0}}, | |
| 379 | |
| 380 #if GRAPHEME_BOUNDARIES | |
| 381 /* | |
| 382 * "ab" generates a contraction that's an expansion. The "z" matches the | |
| 383 * first CE of the expansion but the match fails because it ends in the | |
| 384 * middle of an expansion... | |
| 385 */ | |
| 386 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}}, | |
| 387 #else | |
| 388 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, -1}, {2}}, | |
| 389 #endif | |
| 390 | |
| 391 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 392 }; | |
| 393 | |
| 394 static const char *IGNORABLERULE = "&a = \\u0300"; | |
| 395 | |
| 396 static const SearchData IGNORABLE[] = { | |
| 397 #if GRAPHEME_BOUNDARIES | |
| 398 /* | |
| 399 * This isn't much of a test when matches have to be on | |
| 400 * grapheme boundiaries. The match at 0 only works because | |
| 401 * it's at the start of the text. | |
| 402 */ | |
| 403 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, | |
| 404 {0, -1}, {2}}, | |
| 405 #else | |
| 406 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, | |
| 407 {0, 3, -1}, {2, 2}}, | |
| 408 #endif | |
| 409 | |
| 410 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 411 }; | |
| 412 | |
| 413 static const SearchData BASICCANONICAL[] = { | |
| 414 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {-1}, {0}}, | |
| 415 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEM
ENT_COMPARISON, NULL, {13, -1}, | |
| 416 {6}}, | |
| 417 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
| 418 {13, 20, -1}, {6, 6}}, | |
| 419 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
| 420 {6, 20, -1}, {6, 6}}, | |
| 421 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {0, 14, -1}, | |
| 422 {6, 6}}, | |
| 423 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {1, -1}, {1}}, | |
| 424 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {5, -1}, {1}}, | |
| 425 | |
| 426 #if GRAPHEME_BOUNDARIES | |
| 427 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 428 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
| 429 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 430 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 431 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEME
NT_COMPARISON, NULL, {-1}, {0}}, | |
| 432 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
| 433 NULL, {-1}, {0}}, | |
| 434 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
| 435 NULL, {-1}, {0}}, | |
| 436 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", | |
| 437 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
| 438 #else | |
| 439 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
| 440 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
| 441 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, | |
| 442 {2}}, | |
| 443 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, | |
| 444 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEME
NT_COMPARISON, NULL, {1, -1}, {3}}, | |
| 445 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
| 446 NULL, {0, -1}, {5}}, | |
| 447 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
| 448 NULL, {0, -1}, {5}}, | |
| 449 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", | |
| 450 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, 12, -1}, {5, 3}}, | |
| 451 #endif | |
| 452 | |
| 453 {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
| 454 {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
| 455 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 456 }; | |
| 457 | |
| 458 | |
| 459 static const SearchData NORMCANONICAL[] = { | |
| 460 #if GRAPHEME_BOUNDARIES | |
| 461 /* | |
| 462 * These tests don't really mean anything. With matches restricted to graphe
me | |
| 463 * boundaries, isCanonicalMatch doesn't mean anything unless normalization i
s | |
| 464 * also turned on... | |
| 465 */ | |
| 466 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 467 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 468 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 469 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 470 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
| 471 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
| 472 #else | |
| 473 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
| 474 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
| 475 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, | |
| 476 {2}}, | |
| 477 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, | |
| 478 {2}}, | |
| 479 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
| 480 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
| 481 #endif | |
| 482 | |
| 483 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 484 }; | |
| 485 | |
| 486 static const SearchData BREAKITERATORCANONICAL[] = { | |
| 487 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "characterbreaker", {0, 5, -1}, | |
| 488 {3, 3}}, | |
| 489 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "wordbreaker", {5, -1}, {3}}, | |
| 490 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, | |
| 491 "characterbreaker", {10, 14, -1}, {3, 2}}, | |
| 492 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", | |
| 493 {10, -1}, {3}}, | |
| 494 {"Channel, another channel, more channels, and one last Channel", | |
| 495 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbr
eaker", {0, 54, -1}, {7, 7}}, | |
| 496 /* jitterbug 1745 */ | |
| 497 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, | |
| 498 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, | |
| 499 {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
| 500 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1,
28, 41, -1}, {1, 1, 1}}, | |
| 501 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "
characterbreaker", {0, -1}, {1}}, | |
| 502 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 503 }; | |
| 504 | |
| 505 static const SearchData STRENGTHCANONICAL[] = { | |
| 506 /*012345678901234567890123456789012345678901234567890123456789 */ | |
| 507 {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
| 508 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, | |
| 509 {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
| 510 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1},
{3}}, | |
| 511 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod
T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe b
ig Toe", | |
| 512 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15,
21, 27, 34, -1}, {5, 5, 5, 5}}, | |
| 513 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, | |
| 514 {10, 14, -1}, {3, 2}}, | |
| 515 {"A channel, another CHANNEL, more Channels, and one last channel...", | |
| 516 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2
, 19, 33, 56, -1}, | |
| 517 {7, 7, 7, 7}}, | |
| 518 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 519 }; | |
| 520 | |
| 521 static const SearchData VARIABLECANONICAL[] = { | |
| 522 /*012345678901234567890123456789012345678901234567890123456789 */ | |
| 523 {"blackbirds black blackbirds blackbird black-bird", | |
| 524 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, 17, 28, 38, -1}, | |
| 525 {9, 9, 9, 10}}, | |
| 526 /* to see that it doesn't go into an infinite loop if the start of text | |
| 527 is a ignorable character */ | |
| 528 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL
, {-1}, {0}}, | |
| 529 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, | |
| 530 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |
| 531 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
| 533 /* testing tightest match */ | |
| 534 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEAR
CH_STANDARD_ELEMENT_COMPARISON, | |
| 535 NULL, {1, -1}, {3}}, | |
| 536 /*012345678901234567890123456789012345678901234567890123456789 */ | |
| 537 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARC
H_STANDARD_ELEMENT_COMPARISON, | |
| 538 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, | |
| 539 /* totally ignorable text */ | |
| 540 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, | |
| 541 NULL, {-1}, {0}}, | |
| 542 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 543 }; | |
| 544 | |
| 545 static const SearchData OVERLAPCANONICAL[] = { | |
| 546 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 2, 4, -1}, | |
| 547 {4, 4, 4}}, | |
| 548 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 549 }; | |
| 550 | |
| 551 static const SearchData NONOVERLAPCANONICAL[] = { | |
| 552 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 4, -1}, {4, 4}}, | |
| 553 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 554 }; | |
| 555 | |
| 556 static const SearchData COLLATORCANONICAL[] = { | |
| 557 /* english */ | |
| 558 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {3}}, | |
| 559 /* tailored */ | |
| 560 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 4, -1}, {3, 3}}, | |
| 561 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 562 }; | |
| 563 | |
| 564 static const SearchData PATTERNCANONICAL[] = { | |
| 565 {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
| 566 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3
, 3}}, | |
| 567 {"The quick brown fox jumps over the lazy foxes", "fox", NULL, | |
| 568 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, | |
| 569 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 570 }; | |
| 571 | |
| 572 static const SearchData TEXTCANONICAL[] = { | |
| 573 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {4, 15, -1}, | |
| 574 {3, 3}}, | |
| 575 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {16, -1}, | |
| 576 {3}}, | |
| 577 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 578 }; | |
| 579 | |
| 580 static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = { | |
| 581 #if GRAPHEME_BOUNDARIES | |
| 582 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {-1}, {0}}, | |
| 583 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
| 584 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
| 585 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
| 586 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
| 587 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 588 | |
| 589 /* first one matches only because it's at the start of the text */ | |
| 590 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
| 591 | |
| 592 /* \\u0300 blocked by \\u0300 */ | |
| 593 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 594 #else | |
| 595 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, | |
| 596 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
| 597 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
| 598 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
| 599 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
| 600 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 601 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, 1, -1}, | |
| 602 {1, 1}}, | |
| 603 /* \\u0300 blocked by \\u0300 */ | |
| 604 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
| 605 #endif | |
| 606 | |
| 607 /* A + 030A + 0301 */ | |
| 608 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 609 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {1}}, | |
| 610 | |
| 611 #if GRAPHEME_BOUNDARIES | |
| 612 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 613 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 614 #else | |
| 615 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 616 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {0, -1}, {1}}, | |
| 617 #endif | |
| 618 | |
| 619 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 620 | |
| 621 #if GRAPHEME_BOUNDARIES | |
| 622 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 623 #else | |
| 624 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 625 #endif | |
| 626 | |
| 627 /* blocked accent */ | |
| 628 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 629 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
| 630 | |
| 631 #if GRAPHEME_BOUNDARIES | |
| 632 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
| 633 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 634 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 635 #else | |
| 636 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
| 637 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, | |
| 638 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, | |
| 639 #endif | |
| 640 | |
| 641 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 642 | |
| 643 #if GRAPHEME_BOUNDARIES | |
| 644 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 645 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
| 646 #else | |
| 647 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 648 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
| 649 #endif | |
| 650 | |
| 651 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
| 652 | |
| 653 #if GRAPHEME_BOUNDARIES | |
| 654 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 655 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
| 656 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", | |
| 657 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {10, -1},
{2}}, | |
| 658 #else | |
| 659 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, | |
| 660 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, | |
| 661 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", | |
| 662 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 6, 10, 1
3, -1}, {1, 3, 2, 1}}, | |
| 663 #endif | |
| 664 | |
| 665 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 666 }; | |
| 667 | |
| 668 static const SearchData MATCHCANONICAL[] = { | |
| 669 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, | |
| 670 {7, 26, -1}, {3, 3}}, | |
| 671 /*012345678901234567890123456789012345678901234567890 */ | |
| 672 {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
| 673 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {
3, 3, 3}}, | |
| 674 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 675 }; | |
| 676 | |
| 677 static const SearchData SUPPLEMENTARYCANONICAL[] = { | |
| 678 /*01234567890123456789012345678901234567890123456789001234567890123456789012
3456789012345678901234567890012345678901234567890123456789 */ | |
| 679 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD
800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
| 680 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {4, 13, 22, 26, 29, -1}, | |
| 681 {2, 2, 2, 2, 2}}, | |
| 682 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, | |
| 683 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, | |
| 684 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, | |
| 685 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 686 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, | |
| 687 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 688 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, | |
| 689 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 690 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, | |
| 691 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
| 692 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 693 }; | |
| 694 | |
| 695 static const SearchData CONTRACTIONCANONICAL[] = { | |
| 696 /* common discontiguous */ | |
| 697 #if GRAPHEME_BOUNDARIES | |
| 698 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
| 699 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 700 #else | |
| 701 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
| 702 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
| 703 #endif | |
| 704 | |
| 705 /* contraction prefix */ | |
| 706 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, NULL, {-1}, {0}}, | |
| 707 | |
| 708 #if GRAPHEME_BOUNDARIES | |
| 709 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {-1}, {0}}, | |
| 710 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {-1}, {0}}, | |
| 711 #else | |
| 712 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, -1}, {2}}, | |
| 713 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {1}}, | |
| 714 #endif | |
| 715 | |
| 716 /* discontiguous problem here for backwards iteration. | |
| 717 forwards gives 0, 4 but backwards give 1, 3 */ | |
| 718 /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, {0, -1}, | |
| 719 {4}}, */ | |
| 720 | |
| 721 /* ends not with a contraction character */ | |
| 722 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 723 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, | |
| 724 | |
| 725 #if GRAPHEME_BOUNDARIES | |
| 726 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 727 | |
| 728 /* blocked discontiguous */ | |
| 729 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
| 730 | |
| 731 /* | |
| 732 * "ab" generates a contraction that's an expansion. The "z" matches the | |
| 733 * first CE of the expansion but the match fails because it ends in the | |
| 734 * middle of an expansion... | |
| 735 */ | |
| 736 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {2}}, | |
| 737 #else | |
| 738 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {0, -1}, {4}}, | |
| 739 | |
| 740 /* blocked discontiguous */ | |
| 741 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {4}}, | |
| 742 | |
| 743 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, -1}, {2}}, | |
| 744 #endif | |
| 745 | |
| 746 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 747 }; | |
| 748 | |
| 749 static const SearchData DIACRITICMATCH[] = { | |
| 750 {"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1
\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 5,-
1}, {4, 3}}, | |
| 751 {"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
| 752 {"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u0
0C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020
", | |
| 753 "\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3,
2, 1, 3, 2}}, | |
| 754 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 755 }; | |
| 756 | |
| 757 static const SearchData INDICPREFIXMATCH[] = { | |
| 758 {"\\u0915\\u0020\\u0915\\u0901\\u0020\\u0915\\u0902\\u0020\\u0915\\u0903\\u0
020\\u0915\\u0940\\u0020\\u0915\\u093F\\u0020\\u0915\\u0943\\u0020\\u0915\\u093C
\\u0020\\u0958", | |
| 759 "\\u0915", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, 2, 5, 8, 11, 14, 17, 20, 23,-1}, {1, 2, 2, 2, 1, 1, 1, 2, 1}}, | |
| 760 {"\\u0915\\u0924\\u0020\\u0915\\u0924\\u0940\\u0020\\u0915\\u0924\\u093F\\u0
020\\u0915\\u0924\\u0947\\u0020\\u0915\\u0943\\u0924\\u0020\\u0915\\u0943\\u0924
\\u0947", | |
| 761 "\\u0915\\u0924", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, 3, 7, 11, -1}, {2, 2, 2, 2}}, | |
| 762 {"\\u0915\\u0924\\u0020\\u0915\\u0924\\u0940\\u0020\\u0915\\u0924\\u093F\\u0
020\\u0915\\u0924\\u0947\\u0020\\u0915\\u0943\\u0924\\u0020\\u0915\\u0943\\u0924
\\u0947", | |
| 763 "\\u0915\\u0943\\u0924", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {15, 19, -1}, {3, 3}}, | |
| 764 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
| 765 }; | |
| 766 | |
| 767 #endif /* #if !UCONFIG_NO_COLLATION */ | |
| 768 | |
| 769 #endif | |
| OLD | NEW |