OLD | NEW |
| (Empty) |
1 Index: source/test/cintltst/usrchtst.c | |
2 =================================================================== | |
3 --- source/test/cintltst/usrchtst.c (revision 75773) | |
4 +++ source/test/cintltst/usrchtst.c (working copy) | |
5 @@ -1,5 +1,5 @@ | |
6 /******************************************************************** | |
7 - * Copyright (c) 2001-2010 International Business Machines | |
8 + * Copyright (c) 2001-2011 International Business Machines | |
9 * Corporation and others. All Rights Reserved. | |
10 ******************************************************************** | |
11 * File usrchtst.c | |
12 @@ -2553,7 +2553,173 @@ | |
13 ucol_close(coll); | |
14 } | |
15 | |
16 +/** | |
17 +* TestUsingSearchCollator | |
18 +*/ | |
19 | |
20 +#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0])) | |
21 + | |
22 +typedef struct { | |
23 + const UChar * pattern; | |
24 + const int32_t * offsets; | |
25 + int32_t offsetsLen; | |
26 +} PatternAndOffsets; | |
27 + | |
28 +static const UChar scKoText[] = { | |
29 + 0x0020, | |
30 +/*01*/ 0xAC00, 0x0020, /* simple LV Hangul */ | |
31 +/*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */ | |
32 +/*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for s
earch */ | |
33 +/*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands f
or search */ | |
34 +/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */ | |
35 +/*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining ja
mo (per search rules) */ | |
36 +/*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo
*/ | |
37 +/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; la
st expands for search */ | |
38 +/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining ja
mo; last expands for search */ | |
39 +/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; al
l expand for search */ | |
40 +/*34*/ 0x00E6, 0x0020, /* small letter ae, expands */ | |
41 +/*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and
acute, decomposes */ | |
42 + 0 | |
43 +}; | |
44 + | |
45 +static const UChar scKoPat0[] = { 0xAC01, 0 }; | |
46 +static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as con
joining jamo */ | |
47 +static const UChar scKoPat2[] = { 0xAC0F, 0 }; | |
48 +static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0
F as basic conjoining jamo */ | |
49 +static const UChar scKoPat4[] = { 0xAFFF, 0 }; | |
50 +static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as con
joining jamo */ | |
51 + | |
52 +static const int32_t scKoSrchOff01[] = { 3, 9, 13 }; | |
53 +static const int32_t scKoSrchOff23[] = { 5, 21, 25 }; | |
54 +static const int32_t scKoSrchOff45[] = { 7, 30 }; | |
55 + | |
56 +static const PatternAndOffsets scKoSrchPatternsOffsets[] = { | |
57 + { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, | |
58 + { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, | |
59 + { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, | |
60 + { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, | |
61 + { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, | |
62 + { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, | |
63 + { NULL, NULL, 0 } | |
64 +}; | |
65 + | |
66 +static const int32_t scKoStndOff01[] = { 3, 9 }; | |
67 +static const int32_t scKoStndOff2[] = { 5, 21 }; | |
68 +static const int32_t scKoStndOff3[] = { 25 }; | |
69 +static const int32_t scKoStndOff45[] = { 7, 30 }; | |
70 + | |
71 +static const PatternAndOffsets scKoStndPatternsOffsets[] = { | |
72 + { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, | |
73 + { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, | |
74 + { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) }, | |
75 + { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) }, | |
76 + { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, | |
77 + { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, | |
78 + { NULL, NULL, 0 } | |
79 +}; | |
80 + | |
81 +typedef struct { | |
82 + const char * locale; | |
83 + const UChar * text; | |
84 + const PatternAndOffsets * patternsAndOffsets; | |
85 +} TUSCItem; | |
86 + | |
87 +static const TUSCItem tuscItems[] = { | |
88 + { "root", scKoText, scKoStndPatternsOffsets }, | |
89 + { "root@collation=search", scKoText, scKoSrchPatternsOffsets }, | |
90 + { "ko@collation=search", scKoText, scKoSrchPatternsOffsets }, | |
91 + { NULL, NULL, NULL } | |
92 +}; | |
93 + | |
94 +static const UChar dummyPat[] = { 0x0061, 0 }; | |
95 + | |
96 +static void TestUsingSearchCollator(void) | |
97 +{ | |
98 + const TUSCItem * tuscItemPtr; | |
99 + for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) { | |
100 + UErrorCode status = U_ZERO_ERROR; | |
101 + UCollator* ucol = ucol_open(tuscItemPtr->locale, &status); | |
102 + if ( U_SUCCESS(status) ) { | |
103 + UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscI
temPtr->text, -1, ucol, NULL, &status); | |
104 + if ( U_SUCCESS(status) ) { | |
105 + const PatternAndOffsets * patternsOffsetsPtr; | |
106 + for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; pat
ternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) { | |
107 + usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1,
&status); | |
108 + if ( U_SUCCESS(status) ) { | |
109 + int32_t offset; | |
110 + const int32_t * nextOffsetPtr; | |
111 + const int32_t * limitOffsetPtr; | |
112 + | |
113 + usearch_reset(usrch); | |
114 + nextOffsetPtr = patternsOffsetsPtr->offsets; | |
115 + limitOffsetPtr = patternsOffsetsPtr->offsets + patterns
OffsetsPtr->offsetsLen; | |
116 + while (TRUE) { | |
117 + offset = usearch_next(usrch, &status); | |
118 + if ( U_FAILURE(status) || offset == USEARCH_DONE )
{ | |
119 + break; | |
120 + } | |
121 + if ( nextOffsetPtr < limitOffsetPtr ) { | |
122 + if (offset != *nextOffsetPtr) { | |
123 + log_err("error, locale %s, expected usearc
h_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); | |
124 + nextOffsetPtr = limitOffsetPtr; | |
125 + break; | |
126 + } | |
127 + nextOffsetPtr++; | |
128 + } else { | |
129 + log_err("error, locale %s, usearch_next returne
d more matches than expected\n", tuscItemPtr->locale ); | |
130 + } | |
131 + } | |
132 + if ( U_FAILURE(status) ) { | |
133 + log_err("error, locale %s, usearch_next failed: %s\
n", tuscItemPtr->locale, u_errorName(status) ); | |
134 + } else if ( nextOffsetPtr < limitOffsetPtr ) { | |
135 + log_err("error, locale %s, usearch_next returned fe
wer matches than expected\n", tuscItemPtr->locale ); | |
136 + } | |
137 + | |
138 + status = U_ZERO_ERROR; | |
139 + usearch_reset(usrch); | |
140 + nextOffsetPtr = patternsOffsetsPtr->offsets + patternsO
ffsetsPtr->offsetsLen; | |
141 + limitOffsetPtr = patternsOffsetsPtr->offsets; | |
142 + while (TRUE) { | |
143 + offset = usearch_previous(usrch, &status); | |
144 + if ( U_FAILURE(status) || offset == USEARCH_DONE )
{ | |
145 + break; | |
146 + } | |
147 + if ( nextOffsetPtr > limitOffsetPtr ) { | |
148 + nextOffsetPtr--; | |
149 + if (offset != *nextOffsetPtr) { | |
150 + log_err("error, locale %s, expected usearc
h_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); | |
151 + nextOffsetPtr = limitOffsetPtr; | |
152 + break; | |
153 + } | |
154 + } else { | |
155 + log_err("error, locale %s, usearch_previous ret
urned more matches than expected\n", tuscItemPtr->locale ); | |
156 + } | |
157 + } | |
158 + if ( U_FAILURE(status) ) { | |
159 + log_err("error, locale %s, usearch_previous failed:
%s\n", tuscItemPtr->locale, u_errorName(status) ); | |
160 + } else if ( nextOffsetPtr > limitOffsetPtr ) { | |
161 + log_err("error, locale %s, usearch_previous returne
d fewer matches than expected\n", tuscItemPtr->locale ); | |
162 + } | |
163 + | |
164 + } else { | |
165 + log_err("error, locale %s, usearch_setPattern failed: %
s\n", tuscItemPtr->locale, u_errorName(status) ); | |
166 + } | |
167 + } | |
168 + usearch_close(usrch); | |
169 + } else { | |
170 + log_err("error, locale %s, usearch_openFromCollator failed: %s\
n", tuscItemPtr->locale, u_errorName(status) ); | |
171 + } | |
172 + ucol_close(ucol); | |
173 + } else { | |
174 + log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->lo
cale, u_errorName(status) ); | |
175 + } | |
176 + } | |
177 +} | |
178 + | |
179 +/** | |
180 +* addSearchTest | |
181 +*/ | |
182 + | |
183 void addSearchTest(TestNode** root) | |
184 { | |
185 addTest(root, &TestStart, "tscoll/usrchtst/TestStart"); | |
186 @@ -2608,6 +2774,7 @@ | |
187 addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward"); | |
188 addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull"); | |
189 addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentica
l"); | |
190 + addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCol
lator"); | |
191 } | |
192 | |
193 #endif /* #if !UCONFIG_NO_COLLATION */ | |
194 Index: source/test/cintltst/citertst.c | |
195 =================================================================== | |
196 --- source/test/cintltst/citertst.c (revision 75773) | |
197 +++ source/test/cintltst/citertst.c (working copy) | |
198 @@ -1,6 +1,6 @@ | |
199 /******************************************************************** | |
200 * COPYRIGHT: | |
201 - * Copyright (c) 1997-2010, International Business Machines Corporation and | |
202 + * Copyright (c) 1997-2011, International Business Machines Corporation and | |
203 * others. All Rights Reserved. | |
204 ********************************************************************/ | |
205 /******************************************************************************
** | |
206 @@ -22,6 +22,7 @@ | |
207 #if !UCONFIG_NO_COLLATION | |
208 | |
209 #include "unicode/ucol.h" | |
210 +#include "unicode/ucoleitr.h" | |
211 #include "unicode/uloc.h" | |
212 #include "unicode/uchar.h" | |
213 #include "unicode/ustring.h" | |
214 @@ -58,6 +59,7 @@ | |
215 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow"
); | |
216 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); | |
217 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); | |
218 + addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchColla
torElements"); | |
219 } | |
220 | |
221 /* The locales we support */ | |
222 @@ -2017,4 +2019,141 @@ | |
223 T_FileStream_close(file); | |
224 } | |
225 | |
226 +/** | |
227 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) w
ith | |
228 +* normalization on AND jamo tailoring, among other things. | |
229 +*/ | |
230 +static const UChar tsceText[] = { /* Nothing in here should be ignorable */ | |
231 + 0x0020, 0xAC00, /* simple LV Hangul */ | |
232 + 0x0020, 0xAC01, /* simple LVT Hangul */ | |
233 + 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ | |
234 + 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search *
/ | |
235 + 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ | |
236 + 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ | |
237 + 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands
for search */ | |
238 + 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand fo
r search */ | |
239 + 0x0020, 0x00E6, /* small letter ae, expands */ | |
240 + 0x0020, 0x1E4D, /* small letter o with tilde and acute, dec
omposes */ | |
241 + 0x0020 | |
242 +}; | |
243 +enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; | |
244 + | |
245 +static const int32_t rootStandardOffsets[] = { | |
246 + 0, 1,2, | |
247 + 2, 3,4,4, | |
248 + 4, 5,6,6, | |
249 + 6, 7,8,8, | |
250 + 8, 9,10,11, | |
251 + 12, 13,14,15, | |
252 + 16, 17,18,19, | |
253 + 20, 21,22,23, | |
254 + 24, 25,26,26,26, | |
255 + 26, 27,28,28, | |
256 + 28, | |
257 + 29 | |
258 +}; | |
259 +enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStanda
rdOffsets[0]) }; | |
260 + | |
261 +static const int32_t rootSearchOffsets[] = { | |
262 + 0, 1,2, | |
263 + 2, 3,4,4, | |
264 + 4, 5,6,6,6, | |
265 + 6, 7,8,8,8,8,8,8, | |
266 + 8, 9,10,11, | |
267 + 12, 13,14,15, | |
268 + 16, 17,18,19,20, | |
269 + 20, 21,22,22,23,23,23,24, | |
270 + 24, 25,26,26,26, | |
271 + 26, 27,28,28, | |
272 + 28, | |
273 + 29 | |
274 +}; | |
275 +enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffs
ets[0]) }; | |
276 + | |
277 +typedef struct { | |
278 + const char * locale; | |
279 + const int32_t * offsets; | |
280 + int32_t offsetsLen; | |
281 +} TSCEItem; | |
282 + | |
283 +static const TSCEItem tsceItems[] = { | |
284 + { "root", rootStandardOffsets, kLen_rootStandardOffsets }, | |
285 + { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets }, | |
286 + { NULL, NULL, 0 } | |
287 +}; | |
288 + | |
289 +static void TestSearchCollatorElements(void) | |
290 +{ | |
291 + const TSCEItem * tsceItemPtr; | |
292 + for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) { | |
293 + UErrorCode status = U_ZERO_ERROR; | |
294 + UCollator* ucol = ucol_open(tsceItemPtr->locale, &status); | |
295 + if ( U_SUCCESS(status) ) { | |
296 + UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_t
sceText, &status); | |
297 + if ( U_SUCCESS(status) ) { | |
298 + int32_t offset, element; | |
299 + const int32_t * nextOffsetPtr; | |
300 + const int32_t * limitOffsetPtr; | |
301 + | |
302 + nextOffsetPtr = tsceItemPtr->offsets; | |
303 + limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen
; | |
304 + do { | |
305 + offset = ucol_getOffset(uce); | |
306 + element = ucol_next(uce, &status); | |
307 + if ( element == 0 ) { | |
308 + log_err("error, locale %s, ucol_next returned element 0
\n", tsceItemPtr->locale ); | |
309 + } | |
310 + if ( nextOffsetPtr < limitOffsetPtr ) { | |
311 + if (offset != *nextOffsetPtr) { | |
312 + log_err("error, locale %s, expected ucol_next -> uc
ol_getOffset %d, got %d\n", | |
313 + tsceItemPtr->locale
, *nextOffsetPtr, offset ); | |
314 + nextOffsetPtr = limitOffsetPtr; | |
315 + break; | |
316 + } | |
317 + nextOffsetPtr++; | |
318 + } else { | |
319 + log_err("error, locale %s, ucol_next returned more elem
ents than expected\n", tsceItemPtr->locale ); | |
320 + } | |
321 + } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); | |
322 + if ( nextOffsetPtr < limitOffsetPtr ) { | |
323 + log_err("error, locale %s, ucol_next returned fewer element
s than expected\n", tsceItemPtr->locale ); | |
324 + } | |
325 + | |
326 + ucol_setOffset(uce, kLen_tsceText, &status); | |
327 + status = U_ZERO_ERROR; | |
328 + nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; | |
329 + limitOffsetPtr = tsceItemPtr->offsets; | |
330 + do { | |
331 + offset = ucol_getOffset(uce); | |
332 + element = ucol_previous(uce, &status); | |
333 + if ( element == 0 ) { | |
334 + log_err("error, locale %s, ucol_previous returned eleme
nt 0\n", tsceItemPtr->locale ); | |
335 + } | |
336 + if ( nextOffsetPtr > limitOffsetPtr ) { | |
337 + nextOffsetPtr--; | |
338 + if (offset != *nextOffsetPtr) { | |
339 + log_err("error, locale %s, expected ucol_previous -
> ucol_getOffset %d, got %d\n", | |
340 + tsceItemPtr->lo
cale, *nextOffsetPtr, offset ); | |
341 + nextOffsetPtr = limitOffsetPtr; | |
342 + break; | |
343 + } | |
344 + } else { | |
345 + log_err("error, locale %s, ucol_previous returned more
elements than expected\n", tsceItemPtr->locale ); | |
346 + } | |
347 + } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); | |
348 + if ( nextOffsetPtr > limitOffsetPtr ) { | |
349 + log_err("error, locale %s, ucol_previous returned fewer ele
ments than expected\n", tsceItemPtr->locale ); | |
350 + } | |
351 + | |
352 + ucol_closeElements(uce); | |
353 + } else { | |
354 + log_err("error, locale %s, ucol_openElements failed: %s\n", tsc
eItemPtr->locale, u_errorName(status) ); | |
355 + } | |
356 + ucol_close(ucol); | |
357 + } else { | |
358 + log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->lo
cale, u_errorName(status) ); | |
359 + } | |
360 + } | |
361 +} | |
362 + | |
363 #endif /* #if !UCONFIG_NO_COLLATION */ | |
364 Index: source/test/cintltst/citertst.h | |
365 =================================================================== | |
366 --- source/test/cintltst/citertst.h (revision 75773) | |
367 +++ source/test/cintltst/citertst.h (working copy) | |
368 @@ -1,6 +1,6 @@ | |
369 /******************************************************************** | |
370 * COPYRIGHT: | |
371 - * Copyright (c) 1997-2008, International Business Machines Corporation and | |
372 + * Copyright (c) 1997-2008,2011, International Business Machines Corporation an
d | |
373 * others. All Rights Reserved. | |
374 ********************************************************************/ | |
375 /******************************************************************************
** | |
376 @@ -101,6 +101,11 @@ | |
377 * Bound checkings. | |
378 */ | |
379 static void TestSortKeyValidity(void); | |
380 +/** | |
381 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) w
ith | |
382 +* normalization on AND jamo tailoring, among other things. | |
383 +*/ | |
384 +static void TestSearchCollatorElements(void); | |
385 | |
386 /*------------------------------------------------------------------------ | |
387 Internal utilities | |
388 Index: source/i18n/ucol.cpp | |
389 =================================================================== | |
390 --- source/i18n/ucol.cpp (revision 75773) | |
391 +++ source/i18n/ucol.cpp (working copy) | |
392 @@ -1,6 +1,6 @@ | |
393 /* | |
394 ******************************************************************************* | |
395 -* Copyright (C) 1996-2010, International Business Machines | |
396 +* Copyright (C) 1996-2011, International Business Machines | |
397 * Corporation and others. All Rights Reserved. | |
398 ******************************************************************************* | |
399 * file name: ucol.cpp | |
400 @@ -1444,173 +1444,176 @@ | |
401 UChar ch = 0; | |
402 collationSource->offsetReturn = NULL; | |
403 | |
404 - for (;;) /* Loop handles case when incremental no
rmalize switches */ | |
405 - { /* to or from the side buffer / origin
al string, and we */ | |
406 - /* need to start again to get the next character. */ | |
407 + do { | |
408 + for (;;) /* Loop handles case when incrementa
l normalize switches */ | |
409 + { /* to or from the side buffer / or
iginal string, and we */ | |
410 + /* need to start again to get the next character. */ | |
411 | |
412 - if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF |
UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) | |
413 - { | |
414 - // The source string is null terminated and we're not working from
the side buffer, | |
415 - // and we're not normalizing. This is the fast path. | |
416 - // (We can be in the side buffer for Thai pre-vowel reordering ev
en when not normalizing.) | |
417 - ch = *collationSource->pos++; | |
418 - if (ch != 0) { | |
419 - break; | |
420 + if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMB
UF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) | |
421 + { | |
422 + // The source string is null terminated and we're not working f
rom the side buffer, | |
423 + // and we're not normalizing. This is the fast path. | |
424 + // (We can be in the side buffer for Thai pre-vowel reorderin
g even when not normalizing.) | |
425 + ch = *collationSource->pos++; | |
426 + if (ch != 0) { | |
427 + break; | |
428 + } | |
429 + else { | |
430 + return UCOL_NO_MORE_CES; | |
431 + } | |
432 } | |
433 - else { | |
434 - return UCOL_NO_MORE_CES; | |
435 - } | |
436 - } | |
437 | |
438 - if (collationSource->flags & UCOL_ITER_HASLEN) { | |
439 - // Normal path for strings when length is specified. | |
440 - // (We can't be in side buffer because it is always null terminat
ed.) | |
441 - if (collationSource->pos >= collationSource->endp) { | |
442 - // Ran off of the end of the main source string. We're done. | |
443 - return UCOL_NO_MORE_CES; | |
444 + if (collationSource->flags & UCOL_ITER_HASLEN) { | |
445 + // Normal path for strings when length is specified. | |
446 + // (We can't be in side buffer because it is always null term
inated.) | |
447 + if (collationSource->pos >= collationSource->endp) { | |
448 + // Ran off of the end of the main source string. We're don
e. | |
449 + return UCOL_NO_MORE_CES; | |
450 + } | |
451 + ch = *collationSource->pos++; | |
452 } | |
453 - ch = *collationSource->pos++; | |
454 - } | |
455 - else if(collationSource->flags & UCOL_USE_ITERATOR) { | |
456 - UChar32 iterCh = collationSource->iterator->next(collationSource->i
terator); | |
457 - if(iterCh == U_SENTINEL) { | |
458 - return UCOL_NO_MORE_CES; | |
459 - } | |
460 - ch = (UChar)iterCh; | |
461 - } | |
462 - else | |
463 - { | |
464 - // Null terminated string. | |
465 - ch = *collationSource->pos++; | |
466 - if (ch == 0) { | |
467 - // Ran off end of buffer. | |
468 - if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { | |
469 - // Ran off end of main string. backing up one character. | |
470 - collationSource->pos--; | |
471 + else if(collationSource->flags & UCOL_USE_ITERATOR) { | |
472 + UChar32 iterCh = collationSource->iterator->next(collationSourc
e->iterator); | |
473 + if(iterCh == U_SENTINEL) { | |
474 return UCOL_NO_MORE_CES; | |
475 } | |
476 - else | |
477 - { | |
478 - // Hit null in the normalize side buffer. | |
479 - // Usually this means the end of the normalized data, | |
480 - // except for one odd case: a null followed by combining ch
ars, | |
481 - // which is the case if we are at the start of the buffer
. | |
482 - if (collationSource->pos == collationSource->writableBuffer
.getBuffer()+1) { | |
483 - break; | |
484 + ch = (UChar)iterCh; | |
485 + } | |
486 + else | |
487 + { | |
488 + // Null terminated string. | |
489 + ch = *collationSource->pos++; | |
490 + if (ch == 0) { | |
491 + // Ran off end of buffer. | |
492 + if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { | |
493 + // Ran off end of main string. backing up one character
. | |
494 + collationSource->pos--; | |
495 + return UCOL_NO_MORE_CES; | |
496 } | |
497 + else | |
498 + { | |
499 + // Hit null in the normalize side buffer. | |
500 + // Usually this means the end of the normalized data, | |
501 + // except for one odd case: a null followed by combinin
g chars, | |
502 + // which is the case if we are at the start of the bu
ffer. | |
503 + if (collationSource->pos == collationSource->writableBu
ffer.getBuffer()+1) { | |
504 + break; | |
505 + } | |
506 | |
507 - // Null marked end of side buffer. | |
508 - // Revert to the main string and | |
509 - // loop back to top to try again to get a character. | |
510 - collationSource->pos = collationSource->fcdPosition; | |
511 - collationSource->flags = collationSource->origFlags; | |
512 - continue; | |
513 + // Null marked end of side buffer. | |
514 + // Revert to the main string and | |
515 + // loop back to top to try again to get a character. | |
516 + collationSource->pos = collationSource->fcdPosition; | |
517 + collationSource->flags = collationSource->origFlags; | |
518 + continue; | |
519 + } | |
520 } | |
521 } | |
522 - } | |
523 | |
524 - if(collationSource->flags&UCOL_HIRAGANA_Q) { | |
525 - /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the
flag | |
526 - * based on whether the previous codepoint was Hiragana or Katakana
. | |
527 - */ | |
528 - if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) |
| | |
529 - ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3
099 && ch <= 0x309C))) { | |
530 - collationSource->flags |= UCOL_WAS_HIRAGANA; | |
531 - } else { | |
532 - collationSource->flags &= ~UCOL_WAS_HIRAGANA; | |
533 + if(collationSource->flags&UCOL_HIRAGANA_Q) { | |
534 + /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set
the flag | |
535 + * based on whether the previous codepoint was Hiragana or Kata
kana. | |
536 + */ | |
537 + if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f
)) || | |
538 + ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >=
0x3099 && ch <= 0x309C))) { | |
539 + collationSource->flags |= UCOL_WAS_HIRAGANA; | |
540 + } else { | |
541 + collationSource->flags &= ~UCOL_WAS_HIRAGANA; | |
542 + } | |
543 } | |
544 - } | |
545 | |
546 - // We've got a character. See if there's any fcd and/or normalization
stuff to do. | |
547 - // Note that UCOL_ITER_NORM flag is always zero when we are in the s
ide buffer. | |
548 - if ((collationSource->flags & UCOL_ITER_NORM) == 0) { | |
549 - break; | |
550 - } | |
551 + // We've got a character. See if there's any fcd and/or normalizat
ion stuff to do. | |
552 + // Note that UCOL_ITER_NORM flag is always zero when we are in t
he side buffer. | |
553 + if ((collationSource->flags & UCOL_ITER_NORM) == 0) { | |
554 + break; | |
555 + } | |
556 | |
557 - if (collationSource->fcdPosition >= collationSource->pos) { | |
558 - // An earlier FCD check has already covered the current character. | |
559 - // We can go ahead and process this char. | |
560 - break; | |
561 - } | |
562 - | |
563 - if (ch < ZERO_CC_LIMIT_ ) { | |
564 - // Fast fcd safe path. Trailing combining class == 0. This char i
s OK. | |
565 - break; | |
566 - } | |
567 - | |
568 - if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
569 - // We need to peek at the next character in order to tell if we are
FCD | |
570 - if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource-
>pos >= collationSource->endp) { | |
571 - // We are at the last char of source string. | |
572 - // It is always OK for FCD check. | |
573 + if (collationSource->fcdPosition >= collationSource->pos) { | |
574 + // An earlier FCD check has already covered the current charact
er. | |
575 + // We can go ahead and process this char. | |
576 break; | |
577 } | |
578 | |
579 - // Not at last char of source string (or we'll check against termin
ating null). Do the FCD fast test | |
580 - if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
581 + if (ch < ZERO_CC_LIMIT_ ) { | |
582 + // Fast fcd safe path. Trailing combining class == 0. This ch
ar is OK. | |
583 break; | |
584 } | |
585 - } | |
586 | |
587 + if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
588 + // We need to peek at the next character in order to tell if we
are FCD | |
589 + if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSou
rce->pos >= collationSource->endp) { | |
590 + // We are at the last char of source string. | |
591 + // It is always OK for FCD check. | |
592 + break; | |
593 + } | |
594 | |
595 - // Need a more complete FCD check and possible normalization. | |
596 - if (collIterFCD(collationSource)) { | |
597 - collIterNormalize(collationSource); | |
598 - } | |
599 - if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { | |
600 - // No normalization was needed. Go ahead and process the char we
already had. | |
601 - break; | |
602 - } | |
603 + // Not at last char of source string (or we'll check against te
rminating null). Do the FCD fast test | |
604 + if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
605 + break; | |
606 + } | |
607 + } | |
608 | |
609 - // Some normalization happened. Next loop iteration will pick up a cha
r | |
610 - // from the normalization buffer. | |
611 | |
612 - } // end for (;;) | |
613 + // Need a more complete FCD check and possible normalization. | |
614 + if (collIterFCD(collationSource)) { | |
615 + collIterNormalize(collationSource); | |
616 + } | |
617 + if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { | |
618 + // No normalization was needed. Go ahead and process the char
we already had. | |
619 + break; | |
620 + } | |
621 | |
622 + // Some normalization happened. Next loop iteration will pick up a
char | |
623 + // from the normalization buffer. | |
624 | |
625 - if (ch <= 0xFF) { | |
626 - /* For latin-1 characters we never need to fall back to the UCA table
*/ | |
627 - /* because all of the UCA data is replicated in the latinOneMapping
array */ | |
628 - order = coll->latinOneMapping[ch]; | |
629 - if (order > UCOL_NOT_FOUND) { | |
630 - order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, sta
tus); | |
631 + } // end for (;;) | |
632 + | |
633 + | |
634 + if (ch <= 0xFF) { | |
635 + /* For latin-1 characters we never need to fall back to the UCA ta
ble */ | |
636 + /* because all of the UCA data is replicated in the latinOneMapp
ing array */ | |
637 + order = coll->latinOneMapping[ch]; | |
638 + if (order > UCOL_NOT_FOUND) { | |
639 + order = ucol_prv_getSpecialCE(coll, ch, order, collationSource,
status); | |
640 + } | |
641 } | |
642 - } | |
643 - else | |
644 - { | |
645 - // Always use UCA for Han, Hangul | |
646 - // (Han extension A is before main Han block) | |
647 - // **** Han compatibility chars ?? **** | |
648 - if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && | |
649 - (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { | |
650 - if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { | |
651 - // between the two target ranges; do normal lookup | |
652 - // **** this range is YI, Modifier tone letters, **** | |
653 - // **** Latin-D, Syloti Nagari, Phagas-pa. **** | |
654 - // **** Latin-D might be tailored, so we need to **** | |
655 - // **** do the normal lookup for these guys. **** | |
656 + else | |
657 + { | |
658 + // Always use UCA for Han, Hangul | |
659 + // (Han extension A is before main Han block) | |
660 + // **** Han compatibility chars ?? **** | |
661 + if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && | |
662 + (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { | |
663 + if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { | |
664 + // between the two target ranges; do normal lookup | |
665 + // **** this range is YI, Modifier tone letters, **** | |
666 + // **** Latin-D, Syloti Nagari, Phagas-pa. **** | |
667 + // **** Latin-D might be tailored, so we need to **** | |
668 + // **** do the normal lookup for these guys. **** | |
669 + order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); | |
670 + } else { | |
671 + // in one of the target ranges; use UCA | |
672 + order = UCOL_NOT_FOUND; | |
673 + } | |
674 + } else { | |
675 order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); | |
676 - } else { | |
677 - // in one of the target ranges; use UCA | |
678 - order = UCOL_NOT_FOUND; | |
679 } | |
680 - } else { | |
681 - order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); | |
682 - } | |
683 | |
684 - if(order > UCOL_NOT_FOUND) { /* i
f a CE is special */ | |
685 - order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, sta
tus); /* and try to get the special CE */ | |
686 - } | |
687 + if(order > UCOL_NOT_FOUND) {
/* if a CE is special */ | |
688 + order = ucol_prv_getSpecialCE(coll, ch, order, collationSource,
status); /* and try to get the special CE */ | |
689 + } | |
690 | |
691 - if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good
CE in the tailoring */ | |
692 - /* if we got here, the codepoint MUST be over 0xFF - so we look dir
ectly in the trie */ | |
693 - order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); | |
694 + if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a
good CE in the tailoring */ | |
695 + /* if we got here, the codepoint MUST be over 0xFF - so we look
directly in the trie */ | |
696 + order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); | |
697 | |
698 - if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */ | |
699 - order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSo
urce, status); | |
700 + if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE
*/ | |
701 + order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collati
onSource, status); | |
702 + } | |
703 } | |
704 } | |
705 - } | |
706 + } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_
LAST_HANGUL ); | |
707 + | |
708 if(order == UCOL_NOT_FOUND) { | |
709 order = getImplicit(ch, collationSource); | |
710 } | |
711 @@ -1958,161 +1961,163 @@ | |
712 else { | |
713 UChar ch = 0; | |
714 | |
715 - /* | |
716 - Loop handles case when incremental normalize switches to or from the | |
717 - side buffer / original string, and we need to start again to get the | |
718 - next character. | |
719 - */ | |
720 - for (;;) { | |
721 - if (data->flags & UCOL_ITER_HASLEN) { | |
722 - /* | |
723 - Normal path for strings when length is specified. | |
724 - Not in side buffer because it is always null terminated. | |
725 - */ | |
726 - if (data->pos <= data->string) { | |
727 - /* End of the main source string */ | |
728 - return UCOL_NO_MORE_CES; | |
729 - } | |
730 - data->pos --; | |
731 - ch = *data->pos; | |
732 - } | |
733 - // we are using an iterator to go back. Pray for us! | |
734 - else if (data->flags & UCOL_USE_ITERATOR) { | |
735 - UChar32 iterCh = data->iterator->previous(data->iterator); | |
736 - if(iterCh == U_SENTINEL) { | |
737 - return UCOL_NO_MORE_CES; | |
738 - } else { | |
739 - ch = (UChar)iterCh; | |
740 - } | |
741 - } | |
742 - else { | |
743 - data->pos --; | |
744 - ch = *data->pos; | |
745 - /* we are in the side buffer. */ | |
746 - if (ch == 0) { | |
747 + do { | |
748 + /* | |
749 + Loop handles case when incremental normalize switches to or from th
e | |
750 + side buffer / original string, and we need to start again to get th
e | |
751 + next character. | |
752 + */ | |
753 + for (;;) { | |
754 + if (data->flags & UCOL_ITER_HASLEN) { | |
755 /* | |
756 - At the start of the normalize side buffer. | |
757 - Go back to string. | |
758 - Because pointer points to the last accessed character, | |
759 - hence we have to increment it by one here. | |
760 + Normal path for strings when length is specified. | |
761 + Not in side buffer because it is always null terminated. | |
762 */ | |
763 - data->flags = data->origFlags; | |
764 - data->offsetRepeatValue = 0; | |
765 - | |
766 - if (data->fcdPosition == NULL) { | |
767 - data->pos = data->string; | |
768 + if (data->pos <= data->string) { | |
769 + /* End of the main source string */ | |
770 return UCOL_NO_MORE_CES; | |
771 } | |
772 - else { | |
773 - data->pos = data->fcdPosition + 1; | |
774 + data->pos --; | |
775 + ch = *data->pos; | |
776 + } | |
777 + // we are using an iterator to go back. Pray for us! | |
778 + else if (data->flags & UCOL_USE_ITERATOR) { | |
779 + UChar32 iterCh = data->iterator->previous(data->iterator); | |
780 + if(iterCh == U_SENTINEL) { | |
781 + return UCOL_NO_MORE_CES; | |
782 + } else { | |
783 + ch = (UChar)iterCh; | |
784 + } | |
785 + } | |
786 + else { | |
787 + data->pos --; | |
788 + ch = *data->pos; | |
789 + /* we are in the side buffer. */ | |
790 + if (ch == 0) { | |
791 + /* | |
792 + At the start of the normalize side buffer. | |
793 + Go back to string. | |
794 + Because pointer points to the last accessed character, | |
795 + hence we have to increment it by one here. | |
796 + */ | |
797 + data->flags = data->origFlags; | |
798 + data->offsetRepeatValue = 0; | |
799 + | |
800 + if (data->fcdPosition == NULL) { | |
801 + data->pos = data->string; | |
802 + return UCOL_NO_MORE_CES; | |
803 + } | |
804 + else { | |
805 + data->pos = data->fcdPosition + 1; | |
806 + } | |
807 + | |
808 + continue; | |
809 } | |
810 - | |
811 - continue; | |
812 } | |
813 - } | |
814 | |
815 - if(data->flags&UCOL_HIRAGANA_Q) { | |
816 - if(ch>=0x3040 && ch<=0x309f) { | |
817 - data->flags |= UCOL_WAS_HIRAGANA; | |
818 - } else { | |
819 - data->flags &= ~UCOL_WAS_HIRAGANA; | |
820 - } | |
821 - } | |
822 + if(data->flags&UCOL_HIRAGANA_Q) { | |
823 + if(ch>=0x3040 && ch<=0x309f) { | |
824 + data->flags |= UCOL_WAS_HIRAGANA; | |
825 + } else { | |
826 + data->flags &= ~UCOL_WAS_HIRAGANA; | |
827 + } | |
828 + } | |
829 | |
830 - /* | |
831 - * got a character to determine if there's fcd and/or normalization | |
832 - * stuff to do. | |
833 - * if the current character is not fcd. | |
834 - * if current character is at the start of the string | |
835 - * Trailing combining class == 0. | |
836 - * Note if pos is in the writablebuffer, norm is always 0 | |
837 - */ | |
838 - if (ch < ZERO_CC_LIMIT_ || | |
839 - // this should propel us out of the loop in the iterator case | |
840 - (data->flags & UCOL_ITER_NORM) == 0 || | |
841 - (data->fcdPosition != NULL && data->fcdPosition <= data->pos) | |
842 - || data->string == data->pos) { | |
843 - break; | |
844 - } | |
845 - | |
846 - if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
847 - /* if next character is FCD */ | |
848 - if (data->pos == data->string) { | |
849 - /* First char of string is always OK for FCD check */ | |
850 + /* | |
851 + * got a character to determine if there's fcd and/or normalizat
ion | |
852 + * stuff to do. | |
853 + * if the current character is not fcd. | |
854 + * if current character is at the start of the string | |
855 + * Trailing combining class == 0. | |
856 + * Note if pos is in the writablebuffer, norm is always 0 | |
857 + */ | |
858 + if (ch < ZERO_CC_LIMIT_ || | |
859 + // this should propel us out of the loop in the iterator case | |
860 + (data->flags & UCOL_ITER_NORM) == 0 || | |
861 + (data->fcdPosition != NULL && data->fcdPosition <= data->po
s) | |
862 + || data->string == data->pos) { | |
863 break; | |
864 } | |
865 | |
866 - /* Not first char of string, do the FCD fast test */ | |
867 - if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
868 + if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
869 + /* if next character is FCD */ | |
870 + if (data->pos == data->string) { | |
871 + /* First char of string is always OK for FCD check */ | |
872 + break; | |
873 + } | |
874 + | |
875 + /* Not first char of string, do the FCD fast test */ | |
876 + if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { | |
877 + break; | |
878 + } | |
879 + } | |
880 + | |
881 + /* Need a more complete FCD check and possible normalization. *
/ | |
882 + if (collPrevIterFCD(data)) { | |
883 + collPrevIterNormalize(data); | |
884 + } | |
885 + | |
886 + if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { | |
887 + /* No normalization. Go ahead and process the char. */ | |
888 break; | |
889 } | |
890 - } | |
891 | |
892 - /* Need a more complete FCD check and possible normalization. */ | |
893 - if (collPrevIterFCD(data)) { | |
894 - collPrevIterNormalize(data); | |
895 + /* | |
896 + Some normalization happened. | |
897 + Next loop picks up a char from the normalization buffer. | |
898 + */ | |
899 } | |
900 | |
901 - if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { | |
902 - /* No normalization. Go ahead and process the char. */ | |
903 - break; | |
904 - } | |
905 - | |
906 - /* | |
907 - Some normalization happened. | |
908 - Next loop picks up a char from the normalization buffer. | |
909 + /* attempt to handle contractions, after removal of the backwards | |
910 + contraction | |
911 */ | |
912 - } | |
913 - | |
914 - /* attempt to handle contractions, after removal of the backwards | |
915 - contraction | |
916 - */ | |
917 - if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) { | |
918 - result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data
, status); | |
919 - } else { | |
920 - if (ch <= 0xFF) { | |
921 - result = coll->latinOneMapping[ch]; | |
922 - } | |
923 - else { | |
924 - // Always use UCA for [3400..9FFF], [AC00..D7AF] | |
925 - // **** [FA0E..FA2F] ?? **** | |
926 - if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && | |
927 - (ch >= 0x3400 && ch <= 0xD7AF)) { | |
928 - if (ch > 0x9FFF && ch < 0xAC00) { | |
929 - // between the two target ranges; do normal lookup | |
930 - // **** this range is YI, Modifier tone letters, **** | |
931 - // **** Latin-D, Syloti Nagari, Phagas-pa. **** | |
932 - // **** Latin-D might be tailored, so we need to **** | |
933 - // **** do the normal lookup for these guys. **** | |
934 - result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); | |
935 + if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data))
{ | |
936 + result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION,
data, status); | |
937 + } else { | |
938 + if (ch <= 0xFF) { | |
939 + result = coll->latinOneMapping[ch]; | |
940 + } | |
941 + else { | |
942 + // Always use UCA for [3400..9FFF], [AC00..D7AF] | |
943 + // **** [FA0E..FA2F] ?? **** | |
944 + if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && | |
945 + (ch >= 0x3400 && ch <= 0xD7AF)) { | |
946 + if (ch > 0x9FFF && ch < 0xAC00) { | |
947 + // between the two target ranges; do normal lookup | |
948 + // **** this range is YI, Modifier tone letters, **
** | |
949 + // **** Latin-D, Syloti Nagari, Phagas-pa. **
** | |
950 + // **** Latin-D might be tailored, so we need to **
** | |
951 + // **** do the normal lookup for these guys. **
** | |
952 + result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch)
; | |
953 + } else { | |
954 + result = UCOL_NOT_FOUND; | |
955 + } | |
956 } else { | |
957 - result = UCOL_NOT_FOUND; | |
958 + result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); | |
959 } | |
960 - } else { | |
961 - result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); | |
962 } | |
963 - } | |
964 - if (result > UCOL_NOT_FOUND) { | |
965 - result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, stat
us); | |
966 - } | |
967 - if (result == UCOL_NOT_FOUND) { // Not found in master list | |
968 - if (!isAtStartPrevIterate(data) && | |
969 - ucol_contractionEndCP(ch, data->coll)) | |
970 - { | |
971 - result = UCOL_CONTRACTION; | |
972 - } else { | |
973 - if(coll->UCA) { | |
974 - result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch)
; | |
975 + if (result > UCOL_NOT_FOUND) { | |
976 + result = ucol_prv_getSpecialPrevCE(coll, ch, result, data,
status); | |
977 + } | |
978 + if (result == UCOL_NOT_FOUND) { // Not found in master list | |
979 + if (!isAtStartPrevIterate(data) && | |
980 + ucol_contractionEndCP(ch, data->coll)) | |
981 + { | |
982 + result = UCOL_CONTRACTION; | |
983 + } else { | |
984 + if(coll->UCA) { | |
985 + result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping,
ch); | |
986 + } | |
987 } | |
988 - } | |
989 | |
990 - if (result > UCOL_NOT_FOUND) { | |
991 - if(coll->UCA) { | |
992 - result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, resul
t, data, status); | |
993 + if (result > UCOL_NOT_FOUND) { | |
994 + if(coll->UCA) { | |
995 + result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, r
esult, data, status); | |
996 + } | |
997 } | |
998 } | |
999 } | |
1000 - } | |
1001 + } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <=
UCOL_LAST_HANGUL ); | |
1002 | |
1003 if(result == UCOL_NOT_FOUND) { | |
1004 result = getPrevImplicit(ch, data); | |
1005 @@ -3193,6 +3198,7 @@ | |
1006 // Since Hanguls pass the FCD check, it is | |
1007 // guaranteed that we won't be in | |
1008 // the normalization buffer if something like this happens | |
1009 + | |
1010 // However, if we are using a uchar iterator and normalizat
ion | |
1011 // is ON, the Hangul that lead us here is going to be in th
at | |
1012 // normalization buffer. Here we want to restore the uchar | |
1013 @@ -3201,6 +3207,7 @@ | |
1014 source->flags = source->origFlags; // restore the itera
tor | |
1015 source->pos = NULL; | |
1016 } | |
1017 + | |
1018 // Move Jamos into normalization buffer | |
1019 UChar *buffer = source->writableBuffer.getBuffer(4); | |
1020 int32_t bufferLength; | |
1021 @@ -3214,8 +3221,9 @@ | |
1022 } | |
1023 source->writableBuffer.releaseBuffer(bufferLength); | |
1024 | |
1025 - source->fcdPosition = source->pos; // Indicate wher
e to continue in main input string | |
1026 - // after exhausting the writableBuffer | |
1027 + // Indicate where to continue in main input string after ex
hausting the writableBuffer | |
1028 + source->fcdPosition = source->pos; | |
1029 + | |
1030 source->pos = source->writableBuffer.getTerminatedBuffer(
); | |
1031 source->origFlags = source->flags; | |
1032 source->flags |= UCOL_ITER_INNORMBUF; | |
1033 @@ -3966,13 +3974,10 @@ | |
1034 // Since Hanguls pass the FCD check, it is | |
1035 // guaranteed that we won't be in | |
1036 // the normalization buffer if something like this happens | |
1037 + | |
1038 // Move Jamos into normalization buffer | |
1039 - /* | |
1040 - Move the Jamos into the | |
1041 - normalization buffer | |
1042 - */ | |
1043 UChar *tempbuffer = source->writableBuffer.getBuffer(5); | |
1044 - int32_t tempbufferLength; | |
1045 + int32_t tempbufferLength, jamoOffset; | |
1046 tempbuffer[0] = 0; | |
1047 tempbuffer[1] = (UChar)L; | |
1048 tempbuffer[2] = (UChar)V; | |
1049 @@ -3984,16 +3989,30 @@ | |
1050 } | |
1051 source->writableBuffer.releaseBuffer(tempbufferLength); | |
1052 | |
1053 - /* | |
1054 - Indicate where to continue in main input string after exhau
sting | |
1055 - the writableBuffer | |
1056 - */ | |
1057 + // Indicate where to continue in main input string after ex
hausting the writableBuffer | |
1058 if (source->pos == source->string) { | |
1059 + jamoOffset = 0; | |
1060 source->fcdPosition = NULL; | |
1061 } else { | |
1062 + jamoOffset = source->pos - source->string; | |
1063 source->fcdPosition = source->pos-1; | |
1064 } | |
1065 + | |
1066 + // Append offsets for the additional cha
rs | |
1067 + // (not the 0, and not the L whose offse
ts match the original Hangul) | |
1068 + int32_t jamoRemaining = tempbufferLength - 2; | |
1069 + jamoOffset++; // appended offsets should match end of origi
nal Hangul | |
1070 + while (jamoRemaining-- > 0) { | |
1071 + source->appendOffset(jamoOffset, *status); | |
1072 + } | |
1073 | |
1074 + source->offsetRepeatValue = jamoOffset; | |
1075 + | |
1076 + source->offsetReturn = source->offsetStore - 1; | |
1077 + if (source->offsetReturn == source->offsetBuffer) { | |
1078 + source->offsetStore = source->offsetBuffer; | |
1079 + } | |
1080 + | |
1081 source->pos = source->writableBuffer.getTermi
natedBuffer() + tempbufferLength; | |
1082 source->origFlags = source->flags; | |
1083 source->flags |= UCOL_ITER_INNORMBUF; | |
OLD | NEW |