Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: icu52/patches/search_collation.patch

Issue 224943002: icu local change part1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/
Patch Set: function indentation changed Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « icu52/patches/rtti.patch ('k') | icu52/patches/segmentation.patch » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 Index: source/test/cintltst/usrchtst.c
2 ===================================================================
3 --- source/test/cintltst/usrchtst.c (revision 75773)
4 +++ source/test/cintltst/usrchtst.c (working copy)
5 @@ -1,5 +1,5 @@
6 /********************************************************************
7 - * Copyright (c) 2001-2010 International Business Machines
8 + * Copyright (c) 2001-2011 International Business Machines
9 * Corporation and others. All Rights Reserved.
10 ********************************************************************
11 * File usrchtst.c
12 @@ -2553,7 +2553,173 @@
13 ucol_close(coll);
14 }
15
16 +/**
17 +* TestUsingSearchCollator
18 +*/
19
20 +#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))
21 +
22 +typedef struct {
23 + const UChar * pattern;
24 + const int32_t * offsets;
25 + int32_t offsetsLen;
26 +} PatternAndOffsets;
27 +
28 +static const UChar scKoText[] = {
29 + 0x0020,
30 +/*01*/ 0xAC00, 0x0020, /* simple LV Hangul */
31 +/*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */
32 +/*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for s earch */
33 +/*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands f or search */
34 +/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */
35 +/*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining ja mo (per search rules) */
36 +/*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */
37 +/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; la st expands for search */
38 +/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining ja mo; last expands for search */
39 +/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; al l expand for search */
40 +/*34*/ 0x00E6, 0x0020, /* small letter ae, expands */
41 +/*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */
42 + 0
43 +};
44 +
45 +static const UChar scKoPat0[] = { 0xAC01, 0 };
46 +static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as con joining jamo */
47 +static const UChar scKoPat2[] = { 0xAC0F, 0 };
48 +static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0 F as basic conjoining jamo */
49 +static const UChar scKoPat4[] = { 0xAFFF, 0 };
50 +static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as con joining jamo */
51 +
52 +static const int32_t scKoSrchOff01[] = { 3, 9, 13 };
53 +static const int32_t scKoSrchOff23[] = { 5, 21, 25 };
54 +static const int32_t scKoSrchOff45[] = { 7, 30 };
55 +
56 +static const PatternAndOffsets scKoSrchPatternsOffsets[] = {
57 + { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
58 + { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
59 + { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
60 + { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
61 + { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
62 + { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
63 + { NULL, NULL, 0 }
64 +};
65 +
66 +static const int32_t scKoStndOff01[] = { 3, 9 };
67 +static const int32_t scKoStndOff2[] = { 5, 21 };
68 +static const int32_t scKoStndOff3[] = { 25 };
69 +static const int32_t scKoStndOff45[] = { 7, 30 };
70 +
71 +static const PatternAndOffsets scKoStndPatternsOffsets[] = {
72 + { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
73 + { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
74 + { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) },
75 + { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) },
76 + { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
77 + { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
78 + { NULL, NULL, 0 }
79 +};
80 +
81 +typedef struct {
82 + const char * locale;
83 + const UChar * text;
84 + const PatternAndOffsets * patternsAndOffsets;
85 +} TUSCItem;
86 +
87 +static const TUSCItem tuscItems[] = {
88 + { "root", scKoText, scKoStndPatternsOffsets },
89 + { "root@collation=search", scKoText, scKoSrchPatternsOffsets },
90 + { "ko@collation=search", scKoText, scKoSrchPatternsOffsets },
91 + { NULL, NULL, NULL }
92 +};
93 +
94 +static const UChar dummyPat[] = { 0x0061, 0 };
95 +
96 +static void TestUsingSearchCollator(void)
97 +{
98 + const TUSCItem * tuscItemPtr;
99 + for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) {
100 + UErrorCode status = U_ZERO_ERROR;
101 + UCollator* ucol = ucol_open(tuscItemPtr->locale, &status);
102 + if ( U_SUCCESS(status) ) {
103 + UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscI temPtr->text, -1, ucol, NULL, &status);
104 + if ( U_SUCCESS(status) ) {
105 + const PatternAndOffsets * patternsOffsetsPtr;
106 + for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; pat ternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) {
107 + usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status);
108 + if ( U_SUCCESS(status) ) {
109 + int32_t offset;
110 + const int32_t * nextOffsetPtr;
111 + const int32_t * limitOffsetPtr;
112 +
113 + usearch_reset(usrch);
114 + nextOffsetPtr = patternsOffsetsPtr->offsets;
115 + limitOffsetPtr = patternsOffsetsPtr->offsets + patterns OffsetsPtr->offsetsLen;
116 + while (TRUE) {
117 + offset = usearch_next(usrch, &status);
118 + if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
119 + break;
120 + }
121 + if ( nextOffsetPtr < limitOffsetPtr ) {
122 + if (offset != *nextOffsetPtr) {
123 + log_err("error, locale %s, expected usearc h_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
124 + nextOffsetPtr = limitOffsetPtr;
125 + break;
126 + }
127 + nextOffsetPtr++;
128 + } else {
129 + log_err("error, locale %s, usearch_next returne d more matches than expected\n", tuscItemPtr->locale );
130 + }
131 + }
132 + if ( U_FAILURE(status) ) {
133 + log_err("error, locale %s, usearch_next failed: %s\ n", tuscItemPtr->locale, u_errorName(status) );
134 + } else if ( nextOffsetPtr < limitOffsetPtr ) {
135 + log_err("error, locale %s, usearch_next returned fe wer matches than expected\n", tuscItemPtr->locale );
136 + }
137 +
138 + status = U_ZERO_ERROR;
139 + usearch_reset(usrch);
140 + nextOffsetPtr = patternsOffsetsPtr->offsets + patternsO ffsetsPtr->offsetsLen;
141 + limitOffsetPtr = patternsOffsetsPtr->offsets;
142 + while (TRUE) {
143 + offset = usearch_previous(usrch, &status);
144 + if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
145 + break;
146 + }
147 + if ( nextOffsetPtr > limitOffsetPtr ) {
148 + nextOffsetPtr--;
149 + if (offset != *nextOffsetPtr) {
150 + log_err("error, locale %s, expected usearc h_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
151 + nextOffsetPtr = limitOffsetPtr;
152 + break;
153 + }
154 + } else {
155 + log_err("error, locale %s, usearch_previous ret urned more matches than expected\n", tuscItemPtr->locale );
156 + }
157 + }
158 + if ( U_FAILURE(status) ) {
159 + log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
160 + } else if ( nextOffsetPtr > limitOffsetPtr ) {
161 + log_err("error, locale %s, usearch_previous returne d fewer matches than expected\n", tuscItemPtr->locale );
162 + }
163 +
164 + } else {
165 + log_err("error, locale %s, usearch_setPattern failed: % s\n", tuscItemPtr->locale, u_errorName(status) );
166 + }
167 + }
168 + usearch_close(usrch);
169 + } else {
170 + log_err("error, locale %s, usearch_openFromCollator failed: %s\ n", tuscItemPtr->locale, u_errorName(status) );
171 + }
172 + ucol_close(ucol);
173 + } else {
174 + log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->lo cale, u_errorName(status) );
175 + }
176 + }
177 +}
178 +
179 +/**
180 +* addSearchTest
181 +*/
182 +
183 void addSearchTest(TestNode** root)
184 {
185 addTest(root, &TestStart, "tscoll/usrchtst/TestStart");
186 @@ -2608,6 +2774,7 @@
187 addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward");
188 addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
189 addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentica l");
190 + addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCol lator");
191 }
192
193 #endif /* #if !UCONFIG_NO_COLLATION */
194 Index: source/test/cintltst/citertst.c
195 ===================================================================
196 --- source/test/cintltst/citertst.c (revision 75773)
197 +++ source/test/cintltst/citertst.c (working copy)
198 @@ -1,6 +1,6 @@
199 /********************************************************************
200 * COPYRIGHT:
201 - * Copyright (c) 1997-2010, International Business Machines Corporation and
202 + * Copyright (c) 1997-2011, International Business Machines Corporation and
203 * others. All Rights Reserved.
204 ********************************************************************/
205 /****************************************************************************** **
206 @@ -22,6 +22,7 @@
207 #if !UCONFIG_NO_COLLATION
208
209 #include "unicode/ucol.h"
210 +#include "unicode/ucoleitr.h"
211 #include "unicode/uloc.h"
212 #include "unicode/uchar.h"
213 #include "unicode/ustring.h"
214 @@ -58,6 +59,7 @@
215 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow" );
216 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
217 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
218 + addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchColla torElements");
219 }
220
221 /* The locales we support */
222 @@ -2017,4 +2019,141 @@
223 T_FileStream_close(file);
224 }
225
226 +/**
227 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) w ith
228 +* normalization on AND jamo tailoring, among other things.
229 +*/
230 +static const UChar tsceText[] = { /* Nothing in here should be ignorable */
231 + 0x0020, 0xAC00, /* simple LV Hangul */
232 + 0x0020, 0xAC01, /* simple LVT Hangul */
233 + 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
234 + 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search * /
235 + 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
236 + 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
237 + 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
238 + 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand fo r search */
239 + 0x0020, 0x00E6, /* small letter ae, expands */
240 + 0x0020, 0x1E4D, /* small letter o with tilde and acute, dec omposes */
241 + 0x0020
242 +};
243 +enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
244 +
245 +static const int32_t rootStandardOffsets[] = {
246 + 0, 1,2,
247 + 2, 3,4,4,
248 + 4, 5,6,6,
249 + 6, 7,8,8,
250 + 8, 9,10,11,
251 + 12, 13,14,15,
252 + 16, 17,18,19,
253 + 20, 21,22,23,
254 + 24, 25,26,26,26,
255 + 26, 27,28,28,
256 + 28,
257 + 29
258 +};
259 +enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStanda rdOffsets[0]) };
260 +
261 +static const int32_t rootSearchOffsets[] = {
262 + 0, 1,2,
263 + 2, 3,4,4,
264 + 4, 5,6,6,6,
265 + 6, 7,8,8,8,8,8,8,
266 + 8, 9,10,11,
267 + 12, 13,14,15,
268 + 16, 17,18,19,20,
269 + 20, 21,22,22,23,23,23,24,
270 + 24, 25,26,26,26,
271 + 26, 27,28,28,
272 + 28,
273 + 29
274 +};
275 +enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffs ets[0]) };
276 +
277 +typedef struct {
278 + const char * locale;
279 + const int32_t * offsets;
280 + int32_t offsetsLen;
281 +} TSCEItem;
282 +
283 +static const TSCEItem tsceItems[] = {
284 + { "root", rootStandardOffsets, kLen_rootStandardOffsets },
285 + { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
286 + { NULL, NULL, 0 }
287 +};
288 +
289 +static void TestSearchCollatorElements(void)
290 +{
291 + const TSCEItem * tsceItemPtr;
292 + for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
293 + UErrorCode status = U_ZERO_ERROR;
294 + UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
295 + if ( U_SUCCESS(status) ) {
296 + UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_t sceText, &status);
297 + if ( U_SUCCESS(status) ) {
298 + int32_t offset, element;
299 + const int32_t * nextOffsetPtr;
300 + const int32_t * limitOffsetPtr;
301 +
302 + nextOffsetPtr = tsceItemPtr->offsets;
303 + limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen ;
304 + do {
305 + offset = ucol_getOffset(uce);
306 + element = ucol_next(uce, &status);
307 + if ( element == 0 ) {
308 + log_err("error, locale %s, ucol_next returned element 0 \n", tsceItemPtr->locale );
309 + }
310 + if ( nextOffsetPtr < limitOffsetPtr ) {
311 + if (offset != *nextOffsetPtr) {
312 + log_err("error, locale %s, expected ucol_next -> uc ol_getOffset %d, got %d\n",
313 + tsceItemPtr->locale , *nextOffsetPtr, offset );
314 + nextOffsetPtr = limitOffsetPtr;
315 + break;
316 + }
317 + nextOffsetPtr++;
318 + } else {
319 + log_err("error, locale %s, ucol_next returned more elem ents than expected\n", tsceItemPtr->locale );
320 + }
321 + } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
322 + if ( nextOffsetPtr < limitOffsetPtr ) {
323 + log_err("error, locale %s, ucol_next returned fewer element s than expected\n", tsceItemPtr->locale );
324 + }
325 +
326 + ucol_setOffset(uce, kLen_tsceText, &status);
327 + status = U_ZERO_ERROR;
328 + nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
329 + limitOffsetPtr = tsceItemPtr->offsets;
330 + do {
331 + offset = ucol_getOffset(uce);
332 + element = ucol_previous(uce, &status);
333 + if ( element == 0 ) {
334 + log_err("error, locale %s, ucol_previous returned eleme nt 0\n", tsceItemPtr->locale );
335 + }
336 + if ( nextOffsetPtr > limitOffsetPtr ) {
337 + nextOffsetPtr--;
338 + if (offset != *nextOffsetPtr) {
339 + log_err("error, locale %s, expected ucol_previous - > ucol_getOffset %d, got %d\n",
340 + tsceItemPtr->lo cale, *nextOffsetPtr, offset );
341 + nextOffsetPtr = limitOffsetPtr;
342 + break;
343 + }
344 + } else {
345 + log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
346 + }
347 + } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
348 + if ( nextOffsetPtr > limitOffsetPtr ) {
349 + log_err("error, locale %s, ucol_previous returned fewer ele ments than expected\n", tsceItemPtr->locale );
350 + }
351 +
352 + ucol_closeElements(uce);
353 + } else {
354 + log_err("error, locale %s, ucol_openElements failed: %s\n", tsc eItemPtr->locale, u_errorName(status) );
355 + }
356 + ucol_close(ucol);
357 + } else {
358 + log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->lo cale, u_errorName(status) );
359 + }
360 + }
361 +}
362 +
363 #endif /* #if !UCONFIG_NO_COLLATION */
364 Index: source/test/cintltst/citertst.h
365 ===================================================================
366 --- source/test/cintltst/citertst.h (revision 75773)
367 +++ source/test/cintltst/citertst.h (working copy)
368 @@ -1,6 +1,6 @@
369 /********************************************************************
370 * COPYRIGHT:
371 - * Copyright (c) 1997-2008, International Business Machines Corporation and
372 + * Copyright (c) 1997-2008,2011, International Business Machines Corporation an d
373 * others. All Rights Reserved.
374 ********************************************************************/
375 /****************************************************************************** **
376 @@ -101,6 +101,11 @@
377 * Bound checkings.
378 */
379 static void TestSortKeyValidity(void);
380 +/**
381 +* TestSearchCollatorElements tests iterator behavior (forwards and backwards) w ith
382 +* normalization on AND jamo tailoring, among other things.
383 +*/
384 +static void TestSearchCollatorElements(void);
385
386 /*------------------------------------------------------------------------
387 Internal utilities
388 Index: source/i18n/ucol.cpp
389 ===================================================================
390 --- source/i18n/ucol.cpp (revision 75773)
391 +++ source/i18n/ucol.cpp (working copy)
392 @@ -1,6 +1,6 @@
393 /*
394 *******************************************************************************
395 -* Copyright (C) 1996-2010, International Business Machines
396 +* Copyright (C) 1996-2011, International Business Machines
397 * Corporation and others. All Rights Reserved.
398 *******************************************************************************
399 * file name: ucol.cpp
400 @@ -1444,173 +1444,176 @@
401 UChar ch = 0;
402 collationSource->offsetReturn = NULL;
403
404 - for (;;) /* Loop handles case when incremental no rmalize switches */
405 - { /* to or from the side buffer / origin al string, and we */
406 - /* need to start again to get the next character. */
407 + do {
408 + for (;;) /* Loop handles case when incrementa l normalize switches */
409 + { /* to or from the side buffer / or iginal string, and we */
410 + /* need to start again to get the next character. */
411
412 - if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
413 - {
414 - // The source string is null terminated and we're not working from the side buffer,
415 - // and we're not normalizing. This is the fast path.
416 - // (We can be in the side buffer for Thai pre-vowel reordering ev en when not normalizing.)
417 - ch = *collationSource->pos++;
418 - if (ch != 0) {
419 - break;
420 + if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMB UF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
421 + {
422 + // The source string is null terminated and we're not working f rom the side buffer,
423 + // and we're not normalizing. This is the fast path.
424 + // (We can be in the side buffer for Thai pre-vowel reorderin g even when not normalizing.)
425 + ch = *collationSource->pos++;
426 + if (ch != 0) {
427 + break;
428 + }
429 + else {
430 + return UCOL_NO_MORE_CES;
431 + }
432 }
433 - else {
434 - return UCOL_NO_MORE_CES;
435 - }
436 - }
437
438 - if (collationSource->flags & UCOL_ITER_HASLEN) {
439 - // Normal path for strings when length is specified.
440 - // (We can't be in side buffer because it is always null terminat ed.)
441 - if (collationSource->pos >= collationSource->endp) {
442 - // Ran off of the end of the main source string. We're done.
443 - return UCOL_NO_MORE_CES;
444 + if (collationSource->flags & UCOL_ITER_HASLEN) {
445 + // Normal path for strings when length is specified.
446 + // (We can't be in side buffer because it is always null term inated.)
447 + if (collationSource->pos >= collationSource->endp) {
448 + // Ran off of the end of the main source string. We're don e.
449 + return UCOL_NO_MORE_CES;
450 + }
451 + ch = *collationSource->pos++;
452 }
453 - ch = *collationSource->pos++;
454 - }
455 - else if(collationSource->flags & UCOL_USE_ITERATOR) {
456 - UChar32 iterCh = collationSource->iterator->next(collationSource->i terator);
457 - if(iterCh == U_SENTINEL) {
458 - return UCOL_NO_MORE_CES;
459 - }
460 - ch = (UChar)iterCh;
461 - }
462 - else
463 - {
464 - // Null terminated string.
465 - ch = *collationSource->pos++;
466 - if (ch == 0) {
467 - // Ran off end of buffer.
468 - if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
469 - // Ran off end of main string. backing up one character.
470 - collationSource->pos--;
471 + else if(collationSource->flags & UCOL_USE_ITERATOR) {
472 + UChar32 iterCh = collationSource->iterator->next(collationSourc e->iterator);
473 + if(iterCh == U_SENTINEL) {
474 return UCOL_NO_MORE_CES;
475 }
476 - else
477 - {
478 - // Hit null in the normalize side buffer.
479 - // Usually this means the end of the normalized data,
480 - // except for one odd case: a null followed by combining ch ars,
481 - // which is the case if we are at the start of the buffer .
482 - if (collationSource->pos == collationSource->writableBuffer .getBuffer()+1) {
483 - break;
484 + ch = (UChar)iterCh;
485 + }
486 + else
487 + {
488 + // Null terminated string.
489 + ch = *collationSource->pos++;
490 + if (ch == 0) {
491 + // Ran off end of buffer.
492 + if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
493 + // Ran off end of main string. backing up one character .
494 + collationSource->pos--;
495 + return UCOL_NO_MORE_CES;
496 }
497 + else
498 + {
499 + // Hit null in the normalize side buffer.
500 + // Usually this means the end of the normalized data,
501 + // except for one odd case: a null followed by combinin g chars,
502 + // which is the case if we are at the start of the bu ffer.
503 + if (collationSource->pos == collationSource->writableBu ffer.getBuffer()+1) {
504 + break;
505 + }
506
507 - // Null marked end of side buffer.
508 - // Revert to the main string and
509 - // loop back to top to try again to get a character.
510 - collationSource->pos = collationSource->fcdPosition;
511 - collationSource->flags = collationSource->origFlags;
512 - continue;
513 + // Null marked end of side buffer.
514 + // Revert to the main string and
515 + // loop back to top to try again to get a character.
516 + collationSource->pos = collationSource->fcdPosition;
517 + collationSource->flags = collationSource->origFlags;
518 + continue;
519 + }
520 }
521 }
522 - }
523
524 - if(collationSource->flags&UCOL_HIRAGANA_Q) {
525 - /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
526 - * based on whether the previous codepoint was Hiragana or Katakana .
527 - */
528 - if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) | |
529 - ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3 099 && ch <= 0x309C))) {
530 - collationSource->flags |= UCOL_WAS_HIRAGANA;
531 - } else {
532 - collationSource->flags &= ~UCOL_WAS_HIRAGANA;
533 + if(collationSource->flags&UCOL_HIRAGANA_Q) {
534 + /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
535 + * based on whether the previous codepoint was Hiragana or Kata kana.
536 + */
537 + if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f )) ||
538 + ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
539 + collationSource->flags |= UCOL_WAS_HIRAGANA;
540 + } else {
541 + collationSource->flags &= ~UCOL_WAS_HIRAGANA;
542 + }
543 }
544 - }
545
546 - // We've got a character. See if there's any fcd and/or normalization stuff to do.
547 - // Note that UCOL_ITER_NORM flag is always zero when we are in the s ide buffer.
548 - if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
549 - break;
550 - }
551 + // We've got a character. See if there's any fcd and/or normalizat ion stuff to do.
552 + // Note that UCOL_ITER_NORM flag is always zero when we are in t he side buffer.
553 + if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
554 + break;
555 + }
556
557 - if (collationSource->fcdPosition >= collationSource->pos) {
558 - // An earlier FCD check has already covered the current character.
559 - // We can go ahead and process this char.
560 - break;
561 - }
562 -
563 - if (ch < ZERO_CC_LIMIT_ ) {
564 - // Fast fcd safe path. Trailing combining class == 0. This char i s OK.
565 - break;
566 - }
567 -
568 - if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
569 - // We need to peek at the next character in order to tell if we are FCD
570 - if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource- >pos >= collationSource->endp) {
571 - // We are at the last char of source string.
572 - // It is always OK for FCD check.
573 + if (collationSource->fcdPosition >= collationSource->pos) {
574 + // An earlier FCD check has already covered the current charact er.
575 + // We can go ahead and process this char.
576 break;
577 }
578
579 - // Not at last char of source string (or we'll check against termin ating null). Do the FCD fast test
580 - if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
581 + if (ch < ZERO_CC_LIMIT_ ) {
582 + // Fast fcd safe path. Trailing combining class == 0. This ch ar is OK.
583 break;
584 }
585 - }
586
587 + if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
588 + // We need to peek at the next character in order to tell if we are FCD
589 + if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSou rce->pos >= collationSource->endp) {
590 + // We are at the last char of source string.
591 + // It is always OK for FCD check.
592 + break;
593 + }
594
595 - // Need a more complete FCD check and possible normalization.
596 - if (collIterFCD(collationSource)) {
597 - collIterNormalize(collationSource);
598 - }
599 - if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
600 - // No normalization was needed. Go ahead and process the char we already had.
601 - break;
602 - }
603 + // Not at last char of source string (or we'll check against te rminating null). Do the FCD fast test
604 + if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
605 + break;
606 + }
607 + }
608
609 - // Some normalization happened. Next loop iteration will pick up a cha r
610 - // from the normalization buffer.
611
612 - } // end for (;;)
613 + // Need a more complete FCD check and possible normalization.
614 + if (collIterFCD(collationSource)) {
615 + collIterNormalize(collationSource);
616 + }
617 + if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
618 + // No normalization was needed. Go ahead and process the char we already had.
619 + break;
620 + }
621
622 + // Some normalization happened. Next loop iteration will pick up a char
623 + // from the normalization buffer.
624
625 - if (ch <= 0xFF) {
626 - /* For latin-1 characters we never need to fall back to the UCA table */
627 - /* because all of the UCA data is replicated in the latinOneMapping array */
628 - order = coll->latinOneMapping[ch];
629 - if (order > UCOL_NOT_FOUND) {
630 - order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, sta tus);
631 + } // end for (;;)
632 +
633 +
634 + if (ch <= 0xFF) {
635 + /* For latin-1 characters we never need to fall back to the UCA ta ble */
636 + /* because all of the UCA data is replicated in the latinOneMapp ing array */
637 + order = coll->latinOneMapping[ch];
638 + if (order > UCOL_NOT_FOUND) {
639 + order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
640 + }
641 }
642 - }
643 - else
644 - {
645 - // Always use UCA for Han, Hangul
646 - // (Han extension A is before main Han block)
647 - // **** Han compatibility chars ?? ****
648 - if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
649 - (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
650 - if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
651 - // between the two target ranges; do normal lookup
652 - // **** this range is YI, Modifier tone letters, ****
653 - // **** Latin-D, Syloti Nagari, Phagas-pa. ****
654 - // **** Latin-D might be tailored, so we need to ****
655 - // **** do the normal lookup for these guys. ****
656 + else
657 + {
658 + // Always use UCA for Han, Hangul
659 + // (Han extension A is before main Han block)
660 + // **** Han compatibility chars ?? ****
661 + if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
662 + (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
663 + if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
664 + // between the two target ranges; do normal lookup
665 + // **** this range is YI, Modifier tone letters, ****
666 + // **** Latin-D, Syloti Nagari, Phagas-pa. ****
667 + // **** Latin-D might be tailored, so we need to ****
668 + // **** do the normal lookup for these guys. ****
669 + order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
670 + } else {
671 + // in one of the target ranges; use UCA
672 + order = UCOL_NOT_FOUND;
673 + }
674 + } else {
675 order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
676 - } else {
677 - // in one of the target ranges; use UCA
678 - order = UCOL_NOT_FOUND;
679 }
680 - } else {
681 - order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
682 - }
683
684 - if(order > UCOL_NOT_FOUND) { /* i f a CE is special */
685 - order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, sta tus); /* and try to get the special CE */
686 - }
687 + if(order > UCOL_NOT_FOUND) { /* if a CE is special */
688 + order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status); /* and try to get the special CE */
689 + }
690
691 - if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */
692 - /* if we got here, the codepoint MUST be over 0xFF - so we look dir ectly in the trie */
693 - order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
694 + if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */
695 + /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
696 + order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
697
698 - if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
699 - order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSo urce, status);
700 + if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
701 + order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collati onSource, status);
702 + }
703 }
704 }
705 - }
706 + } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_ LAST_HANGUL );
707 +
708 if(order == UCOL_NOT_FOUND) {
709 order = getImplicit(ch, collationSource);
710 }
711 @@ -1958,161 +1961,163 @@
712 else {
713 UChar ch = 0;
714
715 - /*
716 - Loop handles case when incremental normalize switches to or from the
717 - side buffer / original string, and we need to start again to get the
718 - next character.
719 - */
720 - for (;;) {
721 - if (data->flags & UCOL_ITER_HASLEN) {
722 - /*
723 - Normal path for strings when length is specified.
724 - Not in side buffer because it is always null terminated.
725 - */
726 - if (data->pos <= data->string) {
727 - /* End of the main source string */
728 - return UCOL_NO_MORE_CES;
729 - }
730 - data->pos --;
731 - ch = *data->pos;
732 - }
733 - // we are using an iterator to go back. Pray for us!
734 - else if (data->flags & UCOL_USE_ITERATOR) {
735 - UChar32 iterCh = data->iterator->previous(data->iterator);
736 - if(iterCh == U_SENTINEL) {
737 - return UCOL_NO_MORE_CES;
738 - } else {
739 - ch = (UChar)iterCh;
740 - }
741 - }
742 - else {
743 - data->pos --;
744 - ch = *data->pos;
745 - /* we are in the side buffer. */
746 - if (ch == 0) {
747 + do {
748 + /*
749 + Loop handles case when incremental normalize switches to or from th e
750 + side buffer / original string, and we need to start again to get th e
751 + next character.
752 + */
753 + for (;;) {
754 + if (data->flags & UCOL_ITER_HASLEN) {
755 /*
756 - At the start of the normalize side buffer.
757 - Go back to string.
758 - Because pointer points to the last accessed character,
759 - hence we have to increment it by one here.
760 + Normal path for strings when length is specified.
761 + Not in side buffer because it is always null terminated.
762 */
763 - data->flags = data->origFlags;
764 - data->offsetRepeatValue = 0;
765 -
766 - if (data->fcdPosition == NULL) {
767 - data->pos = data->string;
768 + if (data->pos <= data->string) {
769 + /* End of the main source string */
770 return UCOL_NO_MORE_CES;
771 }
772 - else {
773 - data->pos = data->fcdPosition + 1;
774 + data->pos --;
775 + ch = *data->pos;
776 + }
777 + // we are using an iterator to go back. Pray for us!
778 + else if (data->flags & UCOL_USE_ITERATOR) {
779 + UChar32 iterCh = data->iterator->previous(data->iterator);
780 + if(iterCh == U_SENTINEL) {
781 + return UCOL_NO_MORE_CES;
782 + } else {
783 + ch = (UChar)iterCh;
784 + }
785 + }
786 + else {
787 + data->pos --;
788 + ch = *data->pos;
789 + /* we are in the side buffer. */
790 + if (ch == 0) {
791 + /*
792 + At the start of the normalize side buffer.
793 + Go back to string.
794 + Because pointer points to the last accessed character,
795 + hence we have to increment it by one here.
796 + */
797 + data->flags = data->origFlags;
798 + data->offsetRepeatValue = 0;
799 +
800 + if (data->fcdPosition == NULL) {
801 + data->pos = data->string;
802 + return UCOL_NO_MORE_CES;
803 + }
804 + else {
805 + data->pos = data->fcdPosition + 1;
806 + }
807 +
808 + continue;
809 }
810 -
811 - continue;
812 }
813 - }
814
815 - if(data->flags&UCOL_HIRAGANA_Q) {
816 - if(ch>=0x3040 && ch<=0x309f) {
817 - data->flags |= UCOL_WAS_HIRAGANA;
818 - } else {
819 - data->flags &= ~UCOL_WAS_HIRAGANA;
820 - }
821 - }
822 + if(data->flags&UCOL_HIRAGANA_Q) {
823 + if(ch>=0x3040 && ch<=0x309f) {
824 + data->flags |= UCOL_WAS_HIRAGANA;
825 + } else {
826 + data->flags &= ~UCOL_WAS_HIRAGANA;
827 + }
828 + }
829
830 - /*
831 - * got a character to determine if there's fcd and/or normalization
832 - * stuff to do.
833 - * if the current character is not fcd.
834 - * if current character is at the start of the string
835 - * Trailing combining class == 0.
836 - * Note if pos is in the writablebuffer, norm is always 0
837 - */
838 - if (ch < ZERO_CC_LIMIT_ ||
839 - // this should propel us out of the loop in the iterator case
840 - (data->flags & UCOL_ITER_NORM) == 0 ||
841 - (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
842 - || data->string == data->pos) {
843 - break;
844 - }
845 -
846 - if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
847 - /* if next character is FCD */
848 - if (data->pos == data->string) {
849 - /* First char of string is always OK for FCD check */
850 + /*
851 + * got a character to determine if there's fcd and/or normalizat ion
852 + * stuff to do.
853 + * if the current character is not fcd.
854 + * if current character is at the start of the string
855 + * Trailing combining class == 0.
856 + * Note if pos is in the writablebuffer, norm is always 0
857 + */
858 + if (ch < ZERO_CC_LIMIT_ ||
859 + // this should propel us out of the loop in the iterator case
860 + (data->flags & UCOL_ITER_NORM) == 0 ||
861 + (data->fcdPosition != NULL && data->fcdPosition <= data->po s)
862 + || data->string == data->pos) {
863 break;
864 }
865
866 - /* Not first char of string, do the FCD fast test */
867 - if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
868 + if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
869 + /* if next character is FCD */
870 + if (data->pos == data->string) {
871 + /* First char of string is always OK for FCD check */
872 + break;
873 + }
874 +
875 + /* Not first char of string, do the FCD fast test */
876 + if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
877 + break;
878 + }
879 + }
880 +
881 + /* Need a more complete FCD check and possible normalization. * /
882 + if (collPrevIterFCD(data)) {
883 + collPrevIterNormalize(data);
884 + }
885 +
886 + if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
887 + /* No normalization. Go ahead and process the char. */
888 break;
889 }
890 - }
891
892 - /* Need a more complete FCD check and possible normalization. */
893 - if (collPrevIterFCD(data)) {
894 - collPrevIterNormalize(data);
895 + /*
896 + Some normalization happened.
897 + Next loop picks up a char from the normalization buffer.
898 + */
899 }
900
901 - if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
902 - /* No normalization. Go ahead and process the char. */
903 - break;
904 - }
905 -
906 - /*
907 - Some normalization happened.
908 - Next loop picks up a char from the normalization buffer.
909 + /* attempt to handle contractions, after removal of the backwards
910 + contraction
911 */
912 - }
913 -
914 - /* attempt to handle contractions, after removal of the backwards
915 - contraction
916 - */
917 - if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
918 - result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data , status);
919 - } else {
920 - if (ch <= 0xFF) {
921 - result = coll->latinOneMapping[ch];
922 - }
923 - else {
924 - // Always use UCA for [3400..9FFF], [AC00..D7AF]
925 - // **** [FA0E..FA2F] ?? ****
926 - if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
927 - (ch >= 0x3400 && ch <= 0xD7AF)) {
928 - if (ch > 0x9FFF && ch < 0xAC00) {
929 - // between the two target ranges; do normal lookup
930 - // **** this range is YI, Modifier tone letters, ****
931 - // **** Latin-D, Syloti Nagari, Phagas-pa. ****
932 - // **** Latin-D might be tailored, so we need to ****
933 - // **** do the normal lookup for these guys. ****
934 - result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
935 + if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
936 + result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
937 + } else {
938 + if (ch <= 0xFF) {
939 + result = coll->latinOneMapping[ch];
940 + }
941 + else {
942 + // Always use UCA for [3400..9FFF], [AC00..D7AF]
943 + // **** [FA0E..FA2F] ?? ****
944 + if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
945 + (ch >= 0x3400 && ch <= 0xD7AF)) {
946 + if (ch > 0x9FFF && ch < 0xAC00) {
947 + // between the two target ranges; do normal lookup
948 + // **** this range is YI, Modifier tone letters, ** **
949 + // **** Latin-D, Syloti Nagari, Phagas-pa. ** **
950 + // **** Latin-D might be tailored, so we need to ** **
951 + // **** do the normal lookup for these guys. ** **
952 + result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch) ;
953 + } else {
954 + result = UCOL_NOT_FOUND;
955 + }
956 } else {
957 - result = UCOL_NOT_FOUND;
958 + result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
959 }
960 - } else {
961 - result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
962 }
963 - }
964 - if (result > UCOL_NOT_FOUND) {
965 - result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, stat us);
966 - }
967 - if (result == UCOL_NOT_FOUND) { // Not found in master list
968 - if (!isAtStartPrevIterate(data) &&
969 - ucol_contractionEndCP(ch, data->coll))
970 - {
971 - result = UCOL_CONTRACTION;
972 - } else {
973 - if(coll->UCA) {
974 - result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch) ;
975 + if (result > UCOL_NOT_FOUND) {
976 + result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
977 + }
978 + if (result == UCOL_NOT_FOUND) { // Not found in master list
979 + if (!isAtStartPrevIterate(data) &&
980 + ucol_contractionEndCP(ch, data->coll))
981 + {
982 + result = UCOL_CONTRACTION;
983 + } else {
984 + if(coll->UCA) {
985 + result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
986 + }
987 }
988 - }
989
990 - if (result > UCOL_NOT_FOUND) {
991 - if(coll->UCA) {
992 - result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, resul t, data, status);
993 + if (result > UCOL_NOT_FOUND) {
994 + if(coll->UCA) {
995 + result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, r esult, data, status);
996 + }
997 }
998 }
999 }
1000 - }
1001 + } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
1002
1003 if(result == UCOL_NOT_FOUND) {
1004 result = getPrevImplicit(ch, data);
1005 @@ -3193,6 +3198,7 @@
1006 // Since Hanguls pass the FCD check, it is
1007 // guaranteed that we won't be in
1008 // the normalization buffer if something like this happens
1009 +
1010 // However, if we are using a uchar iterator and normalizat ion
1011 // is ON, the Hangul that lead us here is going to be in th at
1012 // normalization buffer. Here we want to restore the uchar
1013 @@ -3201,6 +3207,7 @@
1014 source->flags = source->origFlags; // restore the itera tor
1015 source->pos = NULL;
1016 }
1017 +
1018 // Move Jamos into normalization buffer
1019 UChar *buffer = source->writableBuffer.getBuffer(4);
1020 int32_t bufferLength;
1021 @@ -3214,8 +3221,9 @@
1022 }
1023 source->writableBuffer.releaseBuffer(bufferLength);
1024
1025 - source->fcdPosition = source->pos; // Indicate wher e to continue in main input string
1026 - // after exhausting the writableBuffer
1027 + // Indicate where to continue in main input string after ex hausting the writableBuffer
1028 + source->fcdPosition = source->pos;
1029 +
1030 source->pos = source->writableBuffer.getTerminatedBuffer( );
1031 source->origFlags = source->flags;
1032 source->flags |= UCOL_ITER_INNORMBUF;
1033 @@ -3966,13 +3974,10 @@
1034 // Since Hanguls pass the FCD check, it is
1035 // guaranteed that we won't be in
1036 // the normalization buffer if something like this happens
1037 +
1038 // Move Jamos into normalization buffer
1039 - /*
1040 - Move the Jamos into the
1041 - normalization buffer
1042 - */
1043 UChar *tempbuffer = source->writableBuffer.getBuffer(5);
1044 - int32_t tempbufferLength;
1045 + int32_t tempbufferLength, jamoOffset;
1046 tempbuffer[0] = 0;
1047 tempbuffer[1] = (UChar)L;
1048 tempbuffer[2] = (UChar)V;
1049 @@ -3984,16 +3989,30 @@
1050 }
1051 source->writableBuffer.releaseBuffer(tempbufferLength);
1052
1053 - /*
1054 - Indicate where to continue in main input string after exhau sting
1055 - the writableBuffer
1056 - */
1057 + // Indicate where to continue in main input string after ex hausting the writableBuffer
1058 if (source->pos == source->string) {
1059 + jamoOffset = 0;
1060 source->fcdPosition = NULL;
1061 } else {
1062 + jamoOffset = source->pos - source->string;
1063 source->fcdPosition = source->pos-1;
1064 }
1065 +
1066 + // Append offsets for the additional cha rs
1067 + // (not the 0, and not the L whose offse ts match the original Hangul)
1068 + int32_t jamoRemaining = tempbufferLength - 2;
1069 + jamoOffset++; // appended offsets should match end of origi nal Hangul
1070 + while (jamoRemaining-- > 0) {
1071 + source->appendOffset(jamoOffset, *status);
1072 + }
1073
1074 + source->offsetRepeatValue = jamoOffset;
1075 +
1076 + source->offsetReturn = source->offsetStore - 1;
1077 + if (source->offsetReturn == source->offsetBuffer) {
1078 + source->offsetStore = source->offsetBuffer;
1079 + }
1080 +
1081 source->pos = source->writableBuffer.getTermi natedBuffer() + tempbufferLength;
1082 source->origFlags = source->flags;
1083 source->flags |= UCOL_ITER_INNORMBUF;
OLDNEW
« no previous file with comments | « icu52/patches/rtti.patch ('k') | icu52/patches/segmentation.patch » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698