OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * Copyright (c) 2001-2008,2010 International Business Machines |
| 3 * Corporation and others. All Rights Reserved. |
| 4 ******************************************************************** |
| 5 * File USRCHDAT.H |
| 6 * Modification History: |
| 7 * Name date Description |
| 8 * synwee July 31 2001 creation |
| 9 ********************************************************************/ |
| 10 |
| 11 |
| 12 /* |
| 13 Note: This file is included by other C and C++ files. This file should not be di
rectly compiled. |
| 14 */ |
| 15 #ifndef USRCHDAT_C |
| 16 #define USRCHDAT_C |
| 17 |
| 18 #include "unicode/ucol.h" |
| 19 |
| 20 #if !UCONFIG_NO_COLLATION |
| 21 |
| 22 /* Set to 1 if matches must be on grapheme boundaries */ |
| 23 #define GRAPHEME_BOUNDARIES 1 |
| 24 |
| 25 U_CDECL_BEGIN |
| 26 struct SearchData { |
| 27 const char *text; |
| 28 const char *pattern; |
| 29 const char *collator; /* currently supported "fr" "es" "de", p
lus NULL/other => "en" */ |
| 30 UCollationStrength strength; |
| 31 USearchAttributeValue elemCompare; /* value for the USEARCH_ELEMENT_CO
MPARISON attribute */ |
| 32 const char *breaker; /* currently supported "wordbreaker" for
EN_WORDBREAKER_, plus NULL/other => EN_CHARACTERBREAKER_ */ |
| 33 int8_t offset[32]; |
| 34 uint8_t size[32]; |
| 35 }; |
| 36 U_CDECL_END |
| 37 |
| 38 typedef struct SearchData SearchData; |
| 39 |
| 40 static const char *TESTCOLLATORRULE = "& o,O ; p,P"; |
| 41 |
| 42 static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \
\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc"; |
| 43 |
| 44 static const SearchData BASIC[] = { |
| 45 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {-1}, {0}}, |
| 46 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEM
ENT_COMPARISON, NULL, {13, -1}, |
| 47 {6}}, |
| 48 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, |
| 49 {13, 20, -1}, {6, 6}}, |
| 50 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, |
| 51 {6, 20, -1}, {6, 6}}, |
| 52 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {0, 14, -1}, |
| 53 {6, 6}}, |
| 54 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {1, -1}, {1}}, |
| 55 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {5, -1}, {1}}, |
| 56 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 57 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, |
| 58 |
| 59 #if GRAPHEME_BOUNDARIES |
| 60 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 61 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 62 #else |
| 63 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
| 64 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, |
| 65 #endif |
| 66 |
| 67 {"\\u00c9", "e", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, |
| 68 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 69 }; |
| 70 |
| 71 static const SearchData BREAKITERATOREXACT[] = { |
| 72 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "characterbreaker", {0, 5, -1}, |
| 73 {3, 3}}, |
| 74 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "wordbreaker", {5, -1}, {3}}, |
| 75 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, |
| 76 "characterbreaker", {10, 14, -1}, {3, 2}}, |
| 77 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", |
| 78 {10, -1}, {3}}, |
| 79 {"Channel, another channel, more channels, and one last Channel", |
| 80 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbr
eaker", {0, 54, -1}, {7, 7}}, |
| 81 /* jitterbug 1745 */ |
| 82 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, |
| 83 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, |
| 84 {"testing that string ab\\u00e9cd does not match e", "e", NULL, |
| 85 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1,
28, 41, -1}, {1, 1, 1}}, |
| 86 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "
characterbreaker", {0, -1}, {1}}, |
| 87 #if 0 |
| 88 /* Problem reported by Dave Bertoni, same as ticket 4279? */ |
| 89 {"\\u0043\\u004F\\u0302\\u0054\\u00C9", "\\u004F", NULL, UCOL_TERTIARY, USEA
RCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, -1}, {2}}, |
| 90 #endif |
| 91 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 92 }; |
| 93 |
| 94 #define PECHE_WITH_ACCENTS "un p\\u00E9ch\\u00E9, " \ |
| 95 "\\u00E7a p\\u00E8che par, " \ |
| 96 "p\\u00E9cher, " \ |
| 97 "une p\\u00EAche, " \ |
| 98 "un p\\u00EAcher, " \ |
| 99 "j\\u2019ai p\\u00EAch\\u00E9, " \ |
| 100 "un p\\u00E9cheur, " \ |
| 101 "\\u201Cp\\u00E9che\\u201D, " \ |
| 102 "decomp peche\\u0301, " \ |
| 103 "base peche" |
| 104 /* in the above, the interesting words and their offsets are: |
| 105 3 pe<301>che<301> |
| 106 13 pe<300>che |
| 107 24 pe<301>cher |
| 108 36 pe<302>che |
| 109 46 pe<302>cher |
| 110 59 pe<302>che<301> |
| 111 69 pe<301>cheur |
| 112 79 pe<301>che |
| 113 94 peche<+301> |
| 114 107 peche |
| 115 */ |
| 116 |
| 117 static const SearchData STRENGTH[] = { |
| 118 /*012345678901234567890123456789012345678901234567890123456789*/ |
| 119 /*00*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", |
| 120 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}
, {3, 3}}, |
| 121 /*01*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", |
| 122 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16
, -1}, {3}}, |
| 123 /*02*/{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAch
er Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat
toe big Toe", |
| 124 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NUL
L, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, |
| 125 /*03*/{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, |
| 126 {10, 14, -1}, {3, 2}}, |
| 127 /*04*/{"A channel, another CHANNEL, more Channels, and one last channel...", |
| 128 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {2, 19, 33, 56, -1}, {7, 7, 7, 7}}, |
| 129 /*05*/{"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, U
SEARCH_STANDARD_ELEMENT_COMPARISON, |
| 130 NULL, {0, -1}, {1, 0}}, |
| 131 /* some tests for modified element comparison, ticket #7093 */ |
| 132 /*06*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5
, 5, 5, 5, 5, 6, 5}}, |
| 133 /*07*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5
, 6, 5}}, |
| 134 /*08*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {107, -1}, {5}}, |
| 135 /*09*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 5, 5, 5, 6, 5}}, |
| 136 /*10*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5
, 5, 5, 6, 5}}, |
| 137 /*11*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, |
| 138 /*12*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, |
| 139 /*13*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, |
| 140 /*14*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, |
| 141 /*15*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}
, |
| 142 /*16*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, |
| 143 /*17*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, |
| 144 /*18*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, |
| 145 /*19*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5,
5, 5, 5, 5, 5, 5, 5, 6, 5}}, |
| 146 /*20*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5,
5, 5, 5, 6, 5}}, |
| 147 /*21*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, |
| 148 /*22*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, |
| 149 /*23*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5
, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, |
| 150 /*24*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 6, 5}}, |
| 151 /* more tests for modified element comparison (with fr), ticket #7093
*/ |
| 152 /*25*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5
, 5, 5, 5, 5, 6, 5}}, |
| 153 /*26*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5
, 6, 5}}, |
| 154 /*27*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {107, -1}, {5}}, |
| 155 /*28*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 5, 5, 5, 6, 5}}, |
| 156 /*29*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5
, 5, 5, 6, 5}}, |
| 157 /*30*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, |
| 158 /*31*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, |
| 159 /*32*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, |
| 160 /*33*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, |
| 161 /*34*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}
, |
| 162 /*35*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, |
| 163 /*36*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, |
| 164 /*37*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, |
| 165 /*38*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5,
5, 5, 5, 5, 5, 5, 5, 6, 5}}, |
| 166 /*39*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5,
5, 5, 5, 6, 5}}, |
| 167 /*40*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, |
| 168 /*41*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, |
| 169 /*42*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5
, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, |
| 170 /*43*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 6, 5}}, |
| 171 |
| 172 #if 0 |
| 173 /* Ticket 5382 */ |
| 174 {"12\\u0171", "\\u0170", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {2}}, |
| 175 #endif |
| 176 |
| 177 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 178 }; |
| 179 |
| 180 static const SearchData VARIABLE[] = { |
| 181 /*012345678901234567890123456789012345678901234567890123456789*/ |
| 182 {"blackbirds black blackbirds blackbird black-bird", |
| 183 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, 17, 28, 38, -1}, |
| 184 {9, 9, 9, 10}}, |
| 185 /* to see that it doesn't go into an infinite loop if the start of text |
| 186 is a ignorable character */ |
| 187 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL
, {-1}, {0}}, |
| 188 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, |
| 189 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
| 190 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 191 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, |
| 192 /* testing tightest match */ |
| 193 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEAR
CH_STANDARD_ELEMENT_COMPARISON, |
| 194 NULL, {1, -1}, {3}}, |
| 195 /*012345678901234567890123456789012345678901234567890123456789 */ |
| 196 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARC
H_STANDARD_ELEMENT_COMPARISON, |
| 197 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, |
| 198 /* totally ignorable text */ |
| 199 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, |
| 200 NULL, {-1}, {0}}, |
| 201 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 202 }; |
| 203 |
| 204 static const SearchData NORMEXACT[] = { |
| 205 {"a\\u0300\\u0325", "a\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, |
| 206 |
| 207 #if GRAPHEME_BOUNDARIES |
| 208 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 209 #else |
| 210 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
| 211 #endif |
| 212 |
| 213 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 214 }; |
| 215 |
| 216 static const SearchData NONNORMEXACT[] = { |
| 217 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 218 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 219 }; |
| 220 |
| 221 static const SearchData OVERLAP[] = { |
| 222 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 2, 4, -1}, |
| 223 {4, 4, 4}}, |
| 224 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 225 }; |
| 226 |
| 227 static const SearchData NONOVERLAP[] = { |
| 228 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 4, -1}, {4, 4}}, |
| 229 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 230 }; |
| 231 |
| 232 static const SearchData COLLATOR[] = { |
| 233 /* english */ |
| 234 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {3}}, |
| 235 /* tailored */ |
| 236 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 4, -1}, {3, 3}}, |
| 237 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 238 }; |
| 239 |
| 240 static const SearchData PATTERN[] = { |
| 241 {"The quick brown fox jumps over the lazy foxes", "the", NULL, |
| 242 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3
, 3}}, |
| 243 {"The quick brown fox jumps over the lazy foxes", "fox", NULL, |
| 244 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, |
| 245 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 246 }; |
| 247 |
| 248 static const SearchData TEXT[] = { |
| 249 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {4, 15, -1}, |
| 250 {3, 3}}, |
| 251 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {16, -1}, |
| 252 {3}}, |
| 253 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 254 }; |
| 255 |
| 256 static const SearchData COMPOSITEBOUNDARIES[] = { |
| 257 #if GRAPHEME_BOUNDARIES |
| 258 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {-1}, {0}}, |
| 259 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, |
| 260 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, |
| 261 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, |
| 262 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, |
| 263 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 264 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, |
| 265 #else |
| 266 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, |
| 267 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, |
| 268 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, |
| 269 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, |
| 270 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, |
| 271 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 272 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, 1, -1}, |
| 273 {1, 1}}, |
| 274 #endif |
| 275 |
| 276 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 277 /* A + 030A + 0301 */ |
| 278 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 279 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {1}}, |
| 280 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 281 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 282 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 283 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 284 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 285 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 286 |
| 287 #if GRAPHEME_BOUNDARIES |
| 288 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 289 #else |
| 290 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, |
| 291 #endif |
| 292 |
| 293 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 294 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 295 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 296 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 297 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 298 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, |
| 299 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 300 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 301 |
| 302 /* Ticket 5024 */ |
| 303 {"a\\u00e1", "a\\u00e1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {0, -1}, {2}}, |
| 304 |
| 305 /* Ticket 5420 */ |
| 306 {"fu\\u00dfball", "fu\\u00df", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {3}}, |
| 307 {"fu\\u00dfball", "fuss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {3}}, |
| 308 {"fu\\u00dfball", "uss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {1, -1}, {2}}, |
| 309 |
| 310 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 311 }; |
| 312 |
| 313 static const SearchData MATCH[] = { |
| 314 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, |
| 315 {7, 26, -1}, {3, 3}}, |
| 316 /* 012345678901234567890123456789012345678901234567890 */ |
| 317 {"a busy bee is a very busy beeee with no bee life", "bee", NULL, |
| 318 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {
3, 3, 3}}, |
| 319 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 320 }; |
| 321 |
| 322 static const SearchData SUPPLEMENTARY[] = { |
| 323 /* 0123456789012345678901234567890123456789012345678900123456789012345678901
23456789012345678901234567890012345678901234567890123456789 */ |
| 324 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD
800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", |
| 325 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {4, 13, 22, 26, 29, -1}, |
| 326 {2, 2, 2, 2, 2}}, |
| 327 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, |
| 328 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, |
| 329 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, |
| 330 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 331 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, |
| 332 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 333 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, |
| 334 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 335 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, |
| 336 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 337 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 338 }; |
| 339 |
| 340 static const char *CONTRACTIONRULE = |
| 341 "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315"; |
| 342 |
| 343 static const SearchData CONTRACTION[] = { |
| 344 /* common discontiguous */ |
| 345 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, |
| 346 |
| 347 #if GRAPHEME_BOUNDARIES |
| 348 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 349 #else |
| 350 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
| 351 #endif |
| 352 |
| 353 /* contraction prefix */ |
| 354 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, NULL, {-1}, {0}}, |
| 355 |
| 356 #if GRAPHEME_BOUNDARIES |
| 357 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {-1}, {0}}, |
| 358 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {-1}, {0}}, |
| 359 #else |
| 360 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, -1}, {2}}, |
| 361 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {1}}, |
| 362 #endif |
| 363 |
| 364 /* discontiguous problem here for backwards iteration. |
| 365 accents not found because discontiguous stores all information */ |
| 366 {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, |
| 367 {0}}, |
| 368 /* ends not with a contraction character */ |
| 369 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {-1}, |
| 370 {0}}, |
| 371 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, |
| 372 {0, -1}, {3}}, |
| 373 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {-1}, |
| 374 {0}}, |
| 375 /* blocked discontiguous */ |
| 376 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, |
| 377 {-1}, {0}}, |
| 378 |
| 379 #if GRAPHEME_BOUNDARIES |
| 380 /* |
| 381 * "ab" generates a contraction that's an expansion. The "z" matches the |
| 382 * first CE of the expansion but the match fails because it ends in the |
| 383 * middle of an expansion... |
| 384 */ |
| 385 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}}, |
| 386 #else |
| 387 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, -1}, {2}}, |
| 388 #endif |
| 389 |
| 390 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 391 }; |
| 392 |
| 393 static const char *IGNORABLERULE = "&a = \\u0300"; |
| 394 |
| 395 static const SearchData IGNORABLE[] = { |
| 396 #if GRAPHEME_BOUNDARIES |
| 397 /* |
| 398 * This isn't much of a test when matches have to be on |
| 399 * grapheme boundiaries. The match at 0 only works because |
| 400 * it's at the start of the text. |
| 401 */ |
| 402 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, |
| 403 {0, -1}, {2}}, |
| 404 #else |
| 405 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, |
| 406 {0, 3, -1}, {2, 2}}, |
| 407 #endif |
| 408 |
| 409 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 410 }; |
| 411 |
| 412 static const SearchData BASICCANONICAL[] = { |
| 413 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {-1}, {0}}, |
| 414 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEM
ENT_COMPARISON, NULL, {13, -1}, |
| 415 {6}}, |
| 416 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, |
| 417 {13, 20, -1}, {6, 6}}, |
| 418 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, |
| 419 {6, 20, -1}, {6, 6}}, |
| 420 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {0, 14, -1}, |
| 421 {6, 6}}, |
| 422 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {1, -1}, {1}}, |
| 423 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {5, -1}, {1}}, |
| 424 |
| 425 #if GRAPHEME_BOUNDARIES |
| 426 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 427 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, |
| 428 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 429 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 430 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEME
NT_COMPARISON, NULL, {-1}, {0}}, |
| 431 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, |
| 432 NULL, {-1}, {0}}, |
| 433 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, |
| 434 NULL, {-1}, {0}}, |
| 435 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", |
| 436 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, |
| 437 #else |
| 438 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, |
| 439 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, |
| 440 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, |
| 441 {2}}, |
| 442 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, |
| 443 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEME
NT_COMPARISON, NULL, {1, -1}, {3}}, |
| 444 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, |
| 445 NULL, {0, -1}, {5}}, |
| 446 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, |
| 447 NULL, {0, -1}, {5}}, |
| 448 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", |
| 449 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, 12, -1}, {5, 3}}, |
| 450 #endif |
| 451 |
| 452 {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
| 453 {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
| 454 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 455 }; |
| 456 |
| 457 |
| 458 static const SearchData NORMCANONICAL[] = { |
| 459 #if GRAPHEME_BOUNDARIES |
| 460 /* |
| 461 * These tests don't really mean anything. With matches restricted to graphe
me |
| 462 * boundaries, isCanonicalMatch doesn't mean anything unless normalization i
s |
| 463 * also turned on... |
| 464 */ |
| 465 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 466 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 467 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 468 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 469 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, |
| 470 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, |
| 471 #else |
| 472 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, |
| 473 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, |
| 474 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, |
| 475 {2}}, |
| 476 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, |
| 477 {2}}, |
| 478 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, |
| 479 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, |
| 480 #endif |
| 481 |
| 482 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 483 }; |
| 484 |
| 485 static const SearchData BREAKITERATORCANONICAL[] = { |
| 486 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "characterbreaker", {0, 5, -1}, |
| 487 {3, 3}}, |
| 488 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "wordbreaker", {5, -1}, {3}}, |
| 489 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, |
| 490 "characterbreaker", {10, 14, -1}, {3, 2}}, |
| 491 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", |
| 492 {10, -1}, {3}}, |
| 493 {"Channel, another channel, more channels, and one last Channel", |
| 494 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbr
eaker", {0, 54, -1}, {7, 7}}, |
| 495 /* jitterbug 1745 */ |
| 496 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, |
| 497 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, |
| 498 {"testing that string ab\\u00e9cd does not match e", "e", NULL, |
| 499 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1,
28, 41, -1}, {1, 1, 1}}, |
| 500 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "
characterbreaker", {0, -1}, {1}}, |
| 501 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 502 }; |
| 503 |
| 504 static const SearchData STRENGTHCANONICAL[] = { |
| 505 /*012345678901234567890123456789012345678901234567890123456789 */ |
| 506 {"The quick brown fox jumps over the lazy foxes", "fox", "en", |
| 507 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, |
| 508 {"The quick brown fox jumps over the lazy foxes", "fox", "en", |
| 509 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1},
{3}}, |
| 510 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod
T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe b
ig Toe", |
| 511 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15,
21, 27, 34, -1}, {5, 5, 5, 5}}, |
| 512 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, |
| 513 {10, 14, -1}, {3, 2}}, |
| 514 {"A channel, another CHANNEL, more Channels, and one last channel...", |
| 515 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2
, 19, 33, 56, -1}, |
| 516 {7, 7, 7, 7}}, |
| 517 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 518 }; |
| 519 |
| 520 static const SearchData VARIABLECANONICAL[] = { |
| 521 /*012345678901234567890123456789012345678901234567890123456789 */ |
| 522 {"blackbirds black blackbirds blackbird black-bird", |
| 523 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, 17, 28, 38, -1}, |
| 524 {9, 9, 9, 10}}, |
| 525 /* to see that it doesn't go into an infinite loop if the start of text |
| 526 is a ignorable character */ |
| 527 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL
, {-1}, {0}}, |
| 528 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, |
| 529 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
| 530 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, |
| 532 /* testing tightest match */ |
| 533 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEAR
CH_STANDARD_ELEMENT_COMPARISON, |
| 534 NULL, {1, -1}, {3}}, |
| 535 /*012345678901234567890123456789012345678901234567890123456789 */ |
| 536 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARC
H_STANDARD_ELEMENT_COMPARISON, |
| 537 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, |
| 538 /* totally ignorable text */ |
| 539 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, |
| 540 NULL, {-1}, {0}}, |
| 541 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 542 }; |
| 543 |
| 544 static const SearchData OVERLAPCANONICAL[] = { |
| 545 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 2, 4, -1}, |
| 546 {4, 4, 4}}, |
| 547 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 548 }; |
| 549 |
| 550 static const SearchData NONOVERLAPCANONICAL[] = { |
| 551 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 4, -1}, {4, 4}}, |
| 552 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 553 }; |
| 554 |
| 555 static const SearchData COLLATORCANONICAL[] = { |
| 556 /* english */ |
| 557 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {3}}, |
| 558 /* tailored */ |
| 559 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 4, -1}, {3, 3}}, |
| 560 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 561 }; |
| 562 |
| 563 static const SearchData PATTERNCANONICAL[] = { |
| 564 {"The quick brown fox jumps over the lazy foxes", "the", NULL, |
| 565 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3
, 3}}, |
| 566 {"The quick brown fox jumps over the lazy foxes", "fox", NULL, |
| 567 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, |
| 568 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 569 }; |
| 570 |
| 571 static const SearchData TEXTCANONICAL[] = { |
| 572 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {4, 15, -1}, |
| 573 {3, 3}}, |
| 574 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {16, -1}, |
| 575 {3}}, |
| 576 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 577 }; |
| 578 |
| 579 static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = { |
| 580 #if GRAPHEME_BOUNDARIES |
| 581 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {-1}, {0}}, |
| 582 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, |
| 583 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, |
| 584 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, |
| 585 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, |
| 586 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 587 |
| 588 /* first one matches only because it's at the start of the text */ |
| 589 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, |
| 590 |
| 591 /* \\u0300 blocked by \\u0300 */ |
| 592 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 593 #else |
| 594 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, |
| 595 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, |
| 596 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, |
| 597 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, |
| 598 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, |
| 599 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 600 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, 1, -1}, |
| 601 {1, 1}}, |
| 602 /* \\u0300 blocked by \\u0300 */ |
| 603 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, |
| 604 #endif |
| 605 |
| 606 /* A + 030A + 0301 */ |
| 607 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 608 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {1}}, |
| 609 |
| 610 #if GRAPHEME_BOUNDARIES |
| 611 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 612 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 613 #else |
| 614 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 615 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {0, -1}, {1}}, |
| 616 #endif |
| 617 |
| 618 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 619 |
| 620 #if GRAPHEME_BOUNDARIES |
| 621 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 622 #else |
| 623 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 624 #endif |
| 625 |
| 626 /* blocked accent */ |
| 627 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 628 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, |
| 629 |
| 630 #if GRAPHEME_BOUNDARIES |
| 631 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, |
| 632 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 633 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 634 #else |
| 635 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, |
| 636 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, |
| 637 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, |
| 638 #endif |
| 639 |
| 640 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 641 |
| 642 #if GRAPHEME_BOUNDARIES |
| 643 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 644 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, |
| 645 #else |
| 646 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 647 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, |
| 648 #endif |
| 649 |
| 650 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, |
| 651 |
| 652 #if GRAPHEME_BOUNDARIES |
| 653 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 654 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, |
| 655 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", |
| 656 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {10, -1},
{2}}, |
| 657 #else |
| 658 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, |
| 659 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, |
| 660 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", |
| 661 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 6, 10, 1
3, -1}, {1, 3, 2, 1}}, |
| 662 #endif |
| 663 |
| 664 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 665 }; |
| 666 |
| 667 static const SearchData MATCHCANONICAL[] = { |
| 668 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, |
| 669 {7, 26, -1}, {3, 3}}, |
| 670 /*012345678901234567890123456789012345678901234567890 */ |
| 671 {"a busy bee is a very busy beeee with no bee life", "bee", NULL, |
| 672 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {
3, 3, 3}}, |
| 673 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 674 }; |
| 675 |
| 676 static const SearchData SUPPLEMENTARYCANONICAL[] = { |
| 677 /*01234567890123456789012345678901234567890123456789001234567890123456789012
3456789012345678901234567890012345678901234567890123456789 */ |
| 678 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD
800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", |
| 679 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {4, 13, 22, 26, 29, -1}, |
| 680 {2, 2, 2, 2, 2}}, |
| 681 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, |
| 682 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, |
| 683 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, |
| 684 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 685 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, |
| 686 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 687 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, |
| 688 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 689 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, |
| 690 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
| 691 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 692 }; |
| 693 |
| 694 static const SearchData CONTRACTIONCANONICAL[] = { |
| 695 /* common discontiguous */ |
| 696 #if GRAPHEME_BOUNDARIES |
| 697 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, |
| 698 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 699 #else |
| 700 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, |
| 701 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
| 702 #endif |
| 703 |
| 704 /* contraction prefix */ |
| 705 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, NULL, {-1}, {0}}, |
| 706 |
| 707 #if GRAPHEME_BOUNDARIES |
| 708 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {-1}, {0}}, |
| 709 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {-1}, {0}}, |
| 710 #else |
| 711 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, -1}, {2}}, |
| 712 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {1}}, |
| 713 #endif |
| 714 |
| 715 /* discontiguous problem here for backwards iteration. |
| 716 forwards gives 0, 4 but backwards give 1, 3 */ |
| 717 /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, {0, -1}, |
| 718 {4}}, */ |
| 719 |
| 720 /* ends not with a contraction character */ |
| 721 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 722 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, |
| 723 |
| 724 #if GRAPHEME_BOUNDARIES |
| 725 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 726 |
| 727 /* blocked discontiguous */ |
| 728 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
| 729 |
| 730 /* |
| 731 * "ab" generates a contraction that's an expansion. The "z" matches the |
| 732 * first CE of the expansion but the match fails because it ends in the |
| 733 * middle of an expansion... |
| 734 */ |
| 735 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {2}}, |
| 736 #else |
| 737 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {0, -1}, {4}}, |
| 738 |
| 739 /* blocked discontiguous */ |
| 740 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {4}}, |
| 741 |
| 742 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, -1}, {2}}, |
| 743 #endif |
| 744 |
| 745 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} |
| 746 }; |
| 747 |
| 748 static const SearchData DIACRITICMATCH[] = { |
| 749 {"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\
u03BA\\u03B1\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, 5,-1}, {4, 3}}, |
| 750 {"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY
, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
| 751 {"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u030
2\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\
u0300\\u0020", |
| 752 "\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USE
ARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2,
1, 1, 1, 3, 2, 1, 3, 2}}, |
| 753 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}} |
| 754 }; |
| 755 |
| 756 #endif /* #if !UCONFIG_NO_COLLATION */ |
| 757 |
| 758 #endif |
OLD | NEW |