Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Side by Side Diff: source/common/unorm_it.c

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/unorm_it.h ('k') | source/common/unormcmp.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: unorm_it.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jan21
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/uiter.h"
22 #include "unicode/unorm.h"
23 #include "unicode/utf.h"
24 #include "unorm_it.h"
25 #include "cmemory.h"
26
27 /* UNormIterator ------------------------------------------------------------ */
28
29 enum {
30 INITIAL_CAPACITY=100
31 };
32
33 struct UNormIterator {
34 UCharIterator api;
35 UCharIterator *iter;
36
37 /*
38 * chars and states either use the static buffers
39 * or are allocated in the same memory block
40 *
41 * They are parallel arrays with states[] holding the getState() values
42 * from normalization boundaries, and UITER_NO_STATE in between.
43 */
44 UChar *chars;
45 uint32_t *states;
46
47 /*
48 * api.start: first valid character & state in the arrays
49 * api.index: current position
50 * api.limit: one past the last valid character in chars[], but states[limit ] is valid
51 * capacity: length of allocated arrays
52 */
53 int32_t capacity;
54
55 /* the current iter->getState(), saved to avoid unnecessary setState() calls ; may not correspond to api->index! */
56 uint32_t state;
57
58 /* there are UChars available before start or after limit? */
59 UBool hasPrevious, hasNext, isStackAllocated;
60
61 UNormalizationMode mode;
62
63 UChar charsBuffer[INITIAL_CAPACITY];
64 uint32_t statesBuffer[INITIAL_CAPACITY+1]; /* one more than charsBuffer[]! * /
65 };
66
67 static void
68 initIndexes(UNormIterator *uni, UCharIterator *iter) {
69 /* do not pass api so that the compiler knows it's an alias pointer to uni i tself */
70 UCharIterator *api=&uni->api;
71
72 if(!iter->hasPrevious(iter)) {
73 /* set indexes to the beginning of the arrays */
74 api->start=api->index=api->limit=0;
75 uni->hasPrevious=FALSE;
76 uni->hasNext=iter->hasNext(iter);
77 } else if(!iter->hasNext(iter)) {
78 /* set indexes to the end of the arrays */
79 api->start=api->index=api->limit=uni->capacity;
80 uni->hasNext=FALSE;
81 uni->hasPrevious=iter->hasPrevious(iter);
82 } else {
83 /* set indexes into the middle of the arrays */
84 api->start=api->index=api->limit=uni->capacity/2;
85 uni->hasPrevious=uni->hasNext=TRUE;
86 }
87 }
88
89 static UBool
90 reallocArrays(UNormIterator *uni, int32_t capacity, UBool addAtStart) {
91 /* do not pass api so that the compiler knows it's an alias pointer to uni i tself */
92 UCharIterator *api=&uni->api;
93
94 uint32_t *states;
95 UChar *chars;
96 int32_t start, limit;
97
98 states=(uint32_t *)uprv_malloc((capacity+1)*4+capacity*2);
99 if(states==NULL) {
100 return FALSE;
101 }
102
103 chars=(UChar *)(states+(capacity+1));
104 uni->capacity=capacity;
105
106 start=api->start;
107 limit=api->limit;
108
109 if(addAtStart) {
110 /* copy old contents to the end of the new arrays */
111 int32_t delta;
112
113 delta=capacity-uni->capacity;
114 uprv_memcpy(states+delta+start, uni->states+start, (limit-start+1)*4);
115 uprv_memcpy(chars+delta+start, uni->chars+start, (limit-start)*4);
116
117 api->start=start+delta;
118 api->index+=delta;
119 api->limit=limit+delta;
120 } else {
121 /* copy old contents to the beginning of the new arrays */
122 uprv_memcpy(states+start, uni->states+start, (limit-start+1)*4);
123 uprv_memcpy(chars+start, uni->chars+start, (limit-start)*4);
124 }
125
126 uni->chars=chars;
127 uni->states=states;
128
129 return TRUE;
130 }
131
132 static void
133 moveContentsTowardStart(UCharIterator *api, UChar chars[], uint32_t states[], in t32_t delta) {
134 /* move array contents up to make room */
135 int32_t srcIndex, destIndex, limit;
136
137 limit=api->limit;
138 srcIndex=delta;
139 if(srcIndex>api->start) {
140 /* look for a position in the arrays with a known state */
141 while(srcIndex<limit && states[srcIndex]==UITER_NO_STATE) {
142 ++srcIndex;
143 }
144 }
145
146 /* now actually move the array contents */
147 api->start=destIndex=0;
148 while(srcIndex<limit) {
149 chars[destIndex]=chars[srcIndex];
150 states[destIndex++]=states[srcIndex++];
151 }
152
153 /* copy states[limit] as well! */
154 states[destIndex]=states[srcIndex];
155
156 api->limit=destIndex;
157 }
158
159 static void
160 moveContentsTowardEnd(UCharIterator *api, UChar chars[], uint32_t states[], int3 2_t delta) {
161 /* move array contents up to make room */
162 int32_t srcIndex, destIndex, start;
163
164 start=api->start;
165 destIndex=((UNormIterator *)api)->capacity;
166 srcIndex=destIndex-delta;
167 if(srcIndex<api->limit) {
168 /* look for a position in the arrays with a known state */
169 while(srcIndex>start && states[srcIndex]==UITER_NO_STATE) {
170 --srcIndex;
171 }
172 }
173
174 /* now actually move the array contents */
175 api->limit=destIndex;
176
177 /* copy states[limit] as well! */
178 states[destIndex]=states[srcIndex];
179
180 while(srcIndex>start) {
181 chars[--destIndex]=chars[--srcIndex];
182 states[destIndex]=states[srcIndex];
183 }
184
185 api->start=destIndex;
186 }
187
188 /* normalize forward from the limit, assume hasNext is true */
189 static UBool
190 readNext(UNormIterator *uni, UCharIterator *iter) {
191 /* do not pass api so that the compiler knows it's an alias pointer to uni i tself */
192 UCharIterator *api=&uni->api;
193
194 /* make capacity/4 room at the end of the arrays */
195 int32_t limit, capacity, room;
196 UErrorCode errorCode;
197
198 limit=api->limit;
199 capacity=uni->capacity;
200 room=capacity/4;
201 if(room>(capacity-limit)) {
202 /* move array contents to make room */
203 moveContentsTowardStart(api, uni->chars, uni->states, room);
204 api->index=limit=api->limit;
205 uni->hasPrevious=TRUE;
206 }
207
208 /* normalize starting from the limit position */
209 errorCode=U_ZERO_ERROR;
210 if(uni->state!=uni->states[limit]) {
211 uiter_setState(iter, uni->states[limit], &errorCode);
212 if(U_FAILURE(errorCode)) {
213 uni->state=UITER_NO_STATE;
214 uni->hasNext=FALSE;
215 return FALSE;
216 }
217 }
218
219 room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode);
220 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
221 if(room<=capacity) {
222 /* empty and re-use the arrays */
223 uni->states[0]=uni->states[limit];
224 api->start=api->index=api->limit=limit=0;
225 uni->hasPrevious=TRUE;
226 } else {
227 capacity+=room+100;
228 if(!reallocArrays(uni, capacity, FALSE)) {
229 uni->state=UITER_NO_STATE;
230 uni->hasNext=FALSE;
231 return FALSE;
232 }
233 limit=api->limit;
234 }
235
236 errorCode=U_ZERO_ERROR;
237 uiter_setState(iter, uni->states[limit], &errorCode);
238 room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TR UE, NULL, &errorCode);
239 }
240 if(U_FAILURE(errorCode) || room==0) {
241 uni->state=UITER_NO_STATE;
242 uni->hasNext=FALSE;
243 return FALSE;
244 }
245
246 /* room>0 */
247 ++limit; /* leave the known states[limit] alone */
248 for(--room; room>0; --room) {
249 /* set unknown states for all but the normalization boundaries */
250 uni->states[limit++]=UITER_NO_STATE;
251 }
252 uni->states[limit]=uni->state=uiter_getState(iter);
253 uni->hasNext=iter->hasNext(iter);
254 api->limit=limit;
255 return TRUE;
256 }
257
258 /* normalize backward from the start, assume hasPrevious is true */
259 static UBool
260 readPrevious(UNormIterator *uni, UCharIterator *iter) {
261 /* do not pass api so that the compiler knows it's an alias pointer to uni i tself */
262 UCharIterator *api=&uni->api;
263
264 /* make capacity/4 room at the start of the arrays */
265 int32_t start, capacity, room;
266 UErrorCode errorCode;
267
268 start=api->start;
269 capacity=uni->capacity;
270 room=capacity/4;
271 if(room>start) {
272 /* move array contents to make room */
273 moveContentsTowardEnd(api, uni->chars, uni->states, room);
274 api->index=start=api->start;
275 uni->hasNext=TRUE;
276 }
277
278 /* normalize ending at the start position */
279 errorCode=U_ZERO_ERROR;
280 if(uni->state!=uni->states[start]) {
281 uiter_setState(iter, uni->states[start], &errorCode);
282 if(U_FAILURE(errorCode)) {
283 uni->state=UITER_NO_STATE;
284 uni->hasPrevious=FALSE;
285 return FALSE;
286 }
287 }
288
289 room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &erro rCode);
290 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
291 if(room<=capacity) {
292 /* empty and re-use the arrays */
293 uni->states[capacity]=uni->states[start];
294 api->start=api->index=api->limit=start=capacity;
295 uni->hasNext=TRUE;
296 } else {
297 capacity+=room+100;
298 if(!reallocArrays(uni, capacity, TRUE)) {
299 uni->state=UITER_NO_STATE;
300 uni->hasPrevious=FALSE;
301 return FALSE;
302 }
303 start=api->start;
304 }
305
306 errorCode=U_ZERO_ERROR;
307 uiter_setState(iter, uni->states[start], &errorCode);
308 room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, & errorCode);
309 }
310 if(U_FAILURE(errorCode) || room==0) {
311 uni->state=UITER_NO_STATE;
312 uni->hasPrevious=FALSE;
313 return FALSE;
314 }
315
316 /* room>0 */
317 do {
318 /* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */
319 uni->chars[--start]=uni->chars[--room];
320 /* set unknown states for all but the normalization boundaries */
321 uni->states[start]=UITER_NO_STATE;
322 } while(room>0);
323 uni->states[start]=uni->state=uiter_getState(iter);
324 uni->hasPrevious=iter->hasPrevious(iter);
325 api->start=start;
326 return TRUE;
327 }
328
329 /* Iterator runtime API functions ------------------------------------------- */
330
331 static int32_t U_CALLCONV
332 unormIteratorGetIndex(UCharIterator *api, UCharIteratorOrigin origin) {
333 switch(origin) {
334 case UITER_ZERO:
335 case UITER_START:
336 return 0;
337 case UITER_CURRENT:
338 case UITER_LIMIT:
339 case UITER_LENGTH:
340 return UITER_UNKNOWN_INDEX;
341 default:
342 /* not a valid origin */
343 /* Should never get here! */
344 return -1;
345 }
346 }
347
348 static int32_t U_CALLCONV
349 unormIteratorMove(UCharIterator *api, int32_t delta, UCharIteratorOrigin origin) {
350 UNormIterator *uni=(UNormIterator *)api;
351 UCharIterator *iter=uni->iter;
352 int32_t pos;
353
354 switch(origin) {
355 case UITER_ZERO:
356 case UITER_START:
357 /* restart from the beginning */
358 if(uni->hasPrevious) {
359 iter->move(iter, 0, UITER_START);
360 api->start=api->index=api->limit=0;
361 uni->states[api->limit]=uni->state=uiter_getState(iter);
362 uni->hasPrevious=FALSE;
363 uni->hasNext=iter->hasNext(iter);
364 } else {
365 /* we already have the beginning of the normalized text */
366 api->index=api->start;
367 }
368 break;
369 case UITER_CURRENT:
370 break;
371 case UITER_LIMIT:
372 case UITER_LENGTH:
373 /* restart from the end */
374 if(uni->hasNext) {
375 iter->move(iter, 0, UITER_LIMIT);
376 api->start=api->index=api->limit=uni->capacity;
377 uni->states[api->limit]=uni->state=uiter_getState(iter);
378 uni->hasPrevious=iter->hasPrevious(iter);
379 uni->hasNext=FALSE;
380 } else {
381 /* we already have the end of the normalized text */
382 api->index=api->limit;
383 }
384 break;
385 default:
386 return -1; /* Error */
387 }
388
389 /* move relative to the current position by delta normalized UChars */
390 if(delta==0) {
391 /* nothing to do */
392 } else if(delta>0) {
393 /* go forward until the requested position is in the buffer */
394 for(;;) {
395 pos=api->index+delta; /* requested position */
396 delta=pos-api->limit; /* remainder beyond buffered text */
397 if(delta<=0) {
398 api->index=pos; /* position reached */
399 break;
400 }
401
402 /* go to end of buffer and normalize further */
403 api->index=api->limit;
404 if(!uni->hasNext || !readNext(uni, iter)) {
405 break; /* reached end of text */
406 }
407 }
408 } else /* delta<0 */ {
409 /* go backward until the requested position is in the buffer */
410 for(;;) {
411 pos=api->index+delta; /* requested position */
412 delta=pos-api->start; /* remainder beyond buffered text */
413 if(delta>=0) {
414 api->index=pos; /* position reached */
415 break;
416 }
417
418 /* go to start of buffer and normalize further */
419 api->index=api->start;
420 if(!uni->hasPrevious || !readPrevious(uni, iter)) {
421 break; /* reached start of text */
422 }
423 }
424 }
425
426 if(api->index==api->start && !uni->hasPrevious) {
427 return 0;
428 } else {
429 return UITER_UNKNOWN_INDEX;
430 }
431 }
432
433 static UBool U_CALLCONV
434 unormIteratorHasNext(UCharIterator *api) {
435 return api->index<api->limit || ((UNormIterator *)api)->hasNext;
436 }
437
438 static UBool U_CALLCONV
439 unormIteratorHasPrevious(UCharIterator *api) {
440 return api->index>api->start || ((UNormIterator *)api)->hasPrevious;
441 }
442
443 static UChar32 U_CALLCONV
444 unormIteratorCurrent(UCharIterator *api) {
445 UNormIterator *uni=(UNormIterator *)api;
446
447 if( api->index<api->limit ||
448 (uni->hasNext && readNext(uni, uni->iter))
449 ) {
450 return uni->chars[api->index];
451 } else {
452 return U_SENTINEL;
453 }
454 }
455
456 static UChar32 U_CALLCONV
457 unormIteratorNext(UCharIterator *api) {
458 UNormIterator *uni=(UNormIterator *)api;
459
460 if( api->index<api->limit ||
461 (uni->hasNext && readNext(uni, uni->iter))
462 ) {
463 return uni->chars[api->index++];
464 } else {
465 return U_SENTINEL;
466 }
467 }
468
469 static UChar32 U_CALLCONV
470 unormIteratorPrevious(UCharIterator *api) {
471 UNormIterator *uni=(UNormIterator *)api;
472
473 if( api->index>api->start ||
474 (uni->hasPrevious && readPrevious(uni, uni->iter))
475 ) {
476 return uni->chars[--api->index];
477 } else {
478 return U_SENTINEL;
479 }
480 }
481
482 static uint32_t U_CALLCONV
483 unormIteratorGetState(const UCharIterator *api) {
484 /* not uni->state because that may not be at api->index */
485 return ((UNormIterator *)api)->states[api->index];
486 }
487
488 static void U_CALLCONV
489 unormIteratorSetState(UCharIterator *api, uint32_t state, UErrorCode *pErrorCode ) {
490 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
491 /* do nothing */
492 } else if(api==NULL) {
493 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
494 } else if(state==UITER_NO_STATE) {
495 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
496 } else {
497 UNormIterator *uni=(UNormIterator *)api;
498 UCharIterator *iter=((UNormIterator *)api)->iter;
499 if(state!=uni->state) {
500 uni->state=state;
501 uiter_setState(iter, state, pErrorCode);
502 }
503
504 /*
505 * Try shortcuts: If the requested state is in the array contents
506 * then just set the index there.
507 *
508 * We assume that the state is unique per position!
509 */
510 if(state==uni->states[api->index]) {
511 return;
512 } else if(state==uni->states[api->limit]) {
513 api->index=api->limit;
514 return;
515 } else {
516 /* search for the index with this state */
517 int32_t i;
518
519 for(i=api->start; i<api->limit; ++i) {
520 if(state==uni->states[i]) {
521 api->index=i;
522 return;
523 }
524 }
525 }
526
527 /* there is no array index for this state, reset for fresh contents */
528 initIndexes((UNormIterator *)api, iter);
529 uni->states[api->limit]=state;
530 }
531 }
532
533 static const UCharIterator unormIterator={
534 NULL, 0, 0, 0, 0, 0,
535 unormIteratorGetIndex,
536 unormIteratorMove,
537 unormIteratorHasNext,
538 unormIteratorHasPrevious,
539 unormIteratorCurrent,
540 unormIteratorNext,
541 unormIteratorPrevious,
542 NULL,
543 unormIteratorGetState,
544 unormIteratorSetState
545 };
546
547 /* Setup functions ---------------------------------------------------------- */
548
549 U_CAPI UNormIterator * U_EXPORT2
550 unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode) {
551 UNormIterator *uni;
552
553 /* argument checking */
554 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
555 return NULL;
556 }
557
558 /* allocate */
559 uni=NULL;
560 if(stackMem!=NULL && stackMemSize>=sizeof(UNormIterator)) {
561 if(U_ALIGNMENT_OFFSET(stackMem)==0) {
562 /* already aligned */
563 uni=(UNormIterator *)stackMem;
564 } else {
565 int32_t align=(int32_t)U_ALIGNMENT_OFFSET_UP(stackMem);
566 if((stackMemSize-=align)>=(int32_t)sizeof(UNormIterator)) {
567 /* needs alignment */
568 uni=(UNormIterator *)((char *)stackMem+align);
569 }
570 }
571 /* else does not fit */
572 }
573
574 if(uni!=NULL) {
575 uni->isStackAllocated=TRUE;
576 } else {
577 uni=(UNormIterator *)uprv_malloc(sizeof(UNormIterator));
578 if(uni==NULL) {
579 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
580 return NULL;
581 }
582 uni->isStackAllocated=FALSE;
583 }
584
585 /*
586 * initialize
587 * do not memset because that would unnecessarily initialize the arrays
588 */
589 uni->iter=NULL;
590 uni->chars=uni->charsBuffer;
591 uni->states=uni->statesBuffer;
592 uni->capacity=INITIAL_CAPACITY;
593 uni->state=UITER_NO_STATE;
594 uni->hasPrevious=uni->hasNext=FALSE;
595 uni->mode=UNORM_NONE;
596
597 /* set a no-op iterator into the api */
598 uiter_setString(&uni->api, NULL, 0);
599 return uni;
600 }
601
602 U_CAPI void U_EXPORT2
603 unorm_closeIter(UNormIterator *uni) {
604 if(uni!=NULL) {
605 if(uni->states!=uni->statesBuffer) {
606 /* chars and states are allocated in the same memory block */
607 uprv_free(uni->states);
608 }
609 if(!uni->isStackAllocated) {
610 uprv_free(uni);
611 }
612 }
613 }
614
615 U_CAPI UCharIterator * U_EXPORT2
616 unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode) {
617 /* argument checking */
618 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
619 return NULL;
620 }
621 if(uni==NULL) {
622 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
623 return NULL;
624 }
625 if( iter==NULL || iter->getState==NULL || iter->setState==NULL ||
626 mode<UNORM_NONE || UNORM_MODE_COUNT<=mode
627 ) {
628 /* set a no-op iterator into the api */
629 uiter_setString(&uni->api, NULL, 0);
630 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
631 return NULL;
632 }
633
634 /* set the iterator and initialize */
635 uprv_memcpy(&uni->api, &unormIterator, sizeof(unormIterator));
636
637 uni->iter=iter;
638 uni->mode=mode;
639
640 initIndexes(uni, iter);
641 uni->states[uni->api.limit]=uni->state=uiter_getState(iter);
642
643 return &uni->api;
644 }
645
646 #endif /* uconfig.h switches */
OLDNEW
« no previous file with comments | « source/common/unorm_it.h ('k') | source/common/unormcmp.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698