Index: source/common/unorm_it.c |
diff --git a/source/common/unorm_it.c b/source/common/unorm_it.c |
deleted file mode 100644 |
index 3adb1ce07ed858802b6e4c823775819df664118b..0000000000000000000000000000000000000000 |
--- a/source/common/unorm_it.c |
+++ /dev/null |
@@ -1,646 +0,0 @@ |
-/* |
-******************************************************************************* |
-* |
-* Copyright (C) 2003-2011, International Business Machines |
-* Corporation and others. All Rights Reserved. |
-* |
-******************************************************************************* |
-* file name: unorm_it.c |
-* encoding: US-ASCII |
-* tab size: 8 (not used) |
-* indentation:4 |
-* |
-* created on: 2003jan21 |
-* created by: Markus W. Scherer |
-*/ |
- |
-#include "unicode/utypes.h" |
- |
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION |
- |
-#include "unicode/uiter.h" |
-#include "unicode/unorm.h" |
-#include "unicode/utf.h" |
-#include "unorm_it.h" |
-#include "cmemory.h" |
- |
-/* UNormIterator ------------------------------------------------------------ */ |
- |
-enum { |
- INITIAL_CAPACITY=100 |
-}; |
- |
-struct UNormIterator { |
- UCharIterator api; |
- UCharIterator *iter; |
- |
- /* |
- * chars and states either use the static buffers |
- * or are allocated in the same memory block |
- * |
- * They are parallel arrays with states[] holding the getState() values |
- * from normalization boundaries, and UITER_NO_STATE in between. |
- */ |
- UChar *chars; |
- uint32_t *states; |
- |
- /* |
- * api.start: first valid character & state in the arrays |
- * api.index: current position |
- * api.limit: one past the last valid character in chars[], but states[limit] is valid |
- * capacity: length of allocated arrays |
- */ |
- int32_t capacity; |
- |
- /* the current iter->getState(), saved to avoid unnecessary setState() calls; may not correspond to api->index! */ |
- uint32_t state; |
- |
- /* there are UChars available before start or after limit? */ |
- UBool hasPrevious, hasNext, isStackAllocated; |
- |
- UNormalizationMode mode; |
- |
- UChar charsBuffer[INITIAL_CAPACITY]; |
- uint32_t statesBuffer[INITIAL_CAPACITY+1]; /* one more than charsBuffer[]! */ |
-}; |
- |
-static void |
-initIndexes(UNormIterator *uni, UCharIterator *iter) { |
- /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ |
- UCharIterator *api=&uni->api; |
- |
- if(!iter->hasPrevious(iter)) { |
- /* set indexes to the beginning of the arrays */ |
- api->start=api->index=api->limit=0; |
- uni->hasPrevious=FALSE; |
- uni->hasNext=iter->hasNext(iter); |
- } else if(!iter->hasNext(iter)) { |
- /* set indexes to the end of the arrays */ |
- api->start=api->index=api->limit=uni->capacity; |
- uni->hasNext=FALSE; |
- uni->hasPrevious=iter->hasPrevious(iter); |
- } else { |
- /* set indexes into the middle of the arrays */ |
- api->start=api->index=api->limit=uni->capacity/2; |
- uni->hasPrevious=uni->hasNext=TRUE; |
- } |
-} |
- |
-static UBool |
-reallocArrays(UNormIterator *uni, int32_t capacity, UBool addAtStart) { |
- /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ |
- UCharIterator *api=&uni->api; |
- |
- uint32_t *states; |
- UChar *chars; |
- int32_t start, limit; |
- |
- states=(uint32_t *)uprv_malloc((capacity+1)*4+capacity*2); |
- if(states==NULL) { |
- return FALSE; |
- } |
- |
- chars=(UChar *)(states+(capacity+1)); |
- uni->capacity=capacity; |
- |
- start=api->start; |
- limit=api->limit; |
- |
- if(addAtStart) { |
- /* copy old contents to the end of the new arrays */ |
- int32_t delta; |
- |
- delta=capacity-uni->capacity; |
- uprv_memcpy(states+delta+start, uni->states+start, (limit-start+1)*4); |
- uprv_memcpy(chars+delta+start, uni->chars+start, (limit-start)*4); |
- |
- api->start=start+delta; |
- api->index+=delta; |
- api->limit=limit+delta; |
- } else { |
- /* copy old contents to the beginning of the new arrays */ |
- uprv_memcpy(states+start, uni->states+start, (limit-start+1)*4); |
- uprv_memcpy(chars+start, uni->chars+start, (limit-start)*4); |
- } |
- |
- uni->chars=chars; |
- uni->states=states; |
- |
- return TRUE; |
-} |
- |
-static void |
-moveContentsTowardStart(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) { |
- /* move array contents up to make room */ |
- int32_t srcIndex, destIndex, limit; |
- |
- limit=api->limit; |
- srcIndex=delta; |
- if(srcIndex>api->start) { |
- /* look for a position in the arrays with a known state */ |
- while(srcIndex<limit && states[srcIndex]==UITER_NO_STATE) { |
- ++srcIndex; |
- } |
- } |
- |
- /* now actually move the array contents */ |
- api->start=destIndex=0; |
- while(srcIndex<limit) { |
- chars[destIndex]=chars[srcIndex]; |
- states[destIndex++]=states[srcIndex++]; |
- } |
- |
- /* copy states[limit] as well! */ |
- states[destIndex]=states[srcIndex]; |
- |
- api->limit=destIndex; |
-} |
- |
-static void |
-moveContentsTowardEnd(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) { |
- /* move array contents up to make room */ |
- int32_t srcIndex, destIndex, start; |
- |
- start=api->start; |
- destIndex=((UNormIterator *)api)->capacity; |
- srcIndex=destIndex-delta; |
- if(srcIndex<api->limit) { |
- /* look for a position in the arrays with a known state */ |
- while(srcIndex>start && states[srcIndex]==UITER_NO_STATE) { |
- --srcIndex; |
- } |
- } |
- |
- /* now actually move the array contents */ |
- api->limit=destIndex; |
- |
- /* copy states[limit] as well! */ |
- states[destIndex]=states[srcIndex]; |
- |
- while(srcIndex>start) { |
- chars[--destIndex]=chars[--srcIndex]; |
- states[destIndex]=states[srcIndex]; |
- } |
- |
- api->start=destIndex; |
-} |
- |
-/* normalize forward from the limit, assume hasNext is true */ |
-static UBool |
-readNext(UNormIterator *uni, UCharIterator *iter) { |
- /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ |
- UCharIterator *api=&uni->api; |
- |
- /* make capacity/4 room at the end of the arrays */ |
- int32_t limit, capacity, room; |
- UErrorCode errorCode; |
- |
- limit=api->limit; |
- capacity=uni->capacity; |
- room=capacity/4; |
- if(room>(capacity-limit)) { |
- /* move array contents to make room */ |
- moveContentsTowardStart(api, uni->chars, uni->states, room); |
- api->index=limit=api->limit; |
- uni->hasPrevious=TRUE; |
- } |
- |
- /* normalize starting from the limit position */ |
- errorCode=U_ZERO_ERROR; |
- if(uni->state!=uni->states[limit]) { |
- uiter_setState(iter, uni->states[limit], &errorCode); |
- if(U_FAILURE(errorCode)) { |
- uni->state=UITER_NO_STATE; |
- uni->hasNext=FALSE; |
- return FALSE; |
- } |
- } |
- |
- room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode); |
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) { |
- if(room<=capacity) { |
- /* empty and re-use the arrays */ |
- uni->states[0]=uni->states[limit]; |
- api->start=api->index=api->limit=limit=0; |
- uni->hasPrevious=TRUE; |
- } else { |
- capacity+=room+100; |
- if(!reallocArrays(uni, capacity, FALSE)) { |
- uni->state=UITER_NO_STATE; |
- uni->hasNext=FALSE; |
- return FALSE; |
- } |
- limit=api->limit; |
- } |
- |
- errorCode=U_ZERO_ERROR; |
- uiter_setState(iter, uni->states[limit], &errorCode); |
- room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode); |
- } |
- if(U_FAILURE(errorCode) || room==0) { |
- uni->state=UITER_NO_STATE; |
- uni->hasNext=FALSE; |
- return FALSE; |
- } |
- |
- /* room>0 */ |
- ++limit; /* leave the known states[limit] alone */ |
- for(--room; room>0; --room) { |
- /* set unknown states for all but the normalization boundaries */ |
- uni->states[limit++]=UITER_NO_STATE; |
- } |
- uni->states[limit]=uni->state=uiter_getState(iter); |
- uni->hasNext=iter->hasNext(iter); |
- api->limit=limit; |
- return TRUE; |
-} |
- |
-/* normalize backward from the start, assume hasPrevious is true */ |
-static UBool |
-readPrevious(UNormIterator *uni, UCharIterator *iter) { |
- /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ |
- UCharIterator *api=&uni->api; |
- |
- /* make capacity/4 room at the start of the arrays */ |
- int32_t start, capacity, room; |
- UErrorCode errorCode; |
- |
- start=api->start; |
- capacity=uni->capacity; |
- room=capacity/4; |
- if(room>start) { |
- /* move array contents to make room */ |
- moveContentsTowardEnd(api, uni->chars, uni->states, room); |
- api->index=start=api->start; |
- uni->hasNext=TRUE; |
- } |
- |
- /* normalize ending at the start position */ |
- errorCode=U_ZERO_ERROR; |
- if(uni->state!=uni->states[start]) { |
- uiter_setState(iter, uni->states[start], &errorCode); |
- if(U_FAILURE(errorCode)) { |
- uni->state=UITER_NO_STATE; |
- uni->hasPrevious=FALSE; |
- return FALSE; |
- } |
- } |
- |
- room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode); |
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) { |
- if(room<=capacity) { |
- /* empty and re-use the arrays */ |
- uni->states[capacity]=uni->states[start]; |
- api->start=api->index=api->limit=start=capacity; |
- uni->hasNext=TRUE; |
- } else { |
- capacity+=room+100; |
- if(!reallocArrays(uni, capacity, TRUE)) { |
- uni->state=UITER_NO_STATE; |
- uni->hasPrevious=FALSE; |
- return FALSE; |
- } |
- start=api->start; |
- } |
- |
- errorCode=U_ZERO_ERROR; |
- uiter_setState(iter, uni->states[start], &errorCode); |
- room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode); |
- } |
- if(U_FAILURE(errorCode) || room==0) { |
- uni->state=UITER_NO_STATE; |
- uni->hasPrevious=FALSE; |
- return FALSE; |
- } |
- |
- /* room>0 */ |
- do { |
- /* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */ |
- uni->chars[--start]=uni->chars[--room]; |
- /* set unknown states for all but the normalization boundaries */ |
- uni->states[start]=UITER_NO_STATE; |
- } while(room>0); |
- uni->states[start]=uni->state=uiter_getState(iter); |
- uni->hasPrevious=iter->hasPrevious(iter); |
- api->start=start; |
- return TRUE; |
-} |
- |
-/* Iterator runtime API functions ------------------------------------------- */ |
- |
-static int32_t U_CALLCONV |
-unormIteratorGetIndex(UCharIterator *api, UCharIteratorOrigin origin) { |
- switch(origin) { |
- case UITER_ZERO: |
- case UITER_START: |
- return 0; |
- case UITER_CURRENT: |
- case UITER_LIMIT: |
- case UITER_LENGTH: |
- return UITER_UNKNOWN_INDEX; |
- default: |
- /* not a valid origin */ |
- /* Should never get here! */ |
- return -1; |
- } |
-} |
- |
-static int32_t U_CALLCONV |
-unormIteratorMove(UCharIterator *api, int32_t delta, UCharIteratorOrigin origin) { |
- UNormIterator *uni=(UNormIterator *)api; |
- UCharIterator *iter=uni->iter; |
- int32_t pos; |
- |
- switch(origin) { |
- case UITER_ZERO: |
- case UITER_START: |
- /* restart from the beginning */ |
- if(uni->hasPrevious) { |
- iter->move(iter, 0, UITER_START); |
- api->start=api->index=api->limit=0; |
- uni->states[api->limit]=uni->state=uiter_getState(iter); |
- uni->hasPrevious=FALSE; |
- uni->hasNext=iter->hasNext(iter); |
- } else { |
- /* we already have the beginning of the normalized text */ |
- api->index=api->start; |
- } |
- break; |
- case UITER_CURRENT: |
- break; |
- case UITER_LIMIT: |
- case UITER_LENGTH: |
- /* restart from the end */ |
- if(uni->hasNext) { |
- iter->move(iter, 0, UITER_LIMIT); |
- api->start=api->index=api->limit=uni->capacity; |
- uni->states[api->limit]=uni->state=uiter_getState(iter); |
- uni->hasPrevious=iter->hasPrevious(iter); |
- uni->hasNext=FALSE; |
- } else { |
- /* we already have the end of the normalized text */ |
- api->index=api->limit; |
- } |
- break; |
- default: |
- return -1; /* Error */ |
- } |
- |
- /* move relative to the current position by delta normalized UChars */ |
- if(delta==0) { |
- /* nothing to do */ |
- } else if(delta>0) { |
- /* go forward until the requested position is in the buffer */ |
- for(;;) { |
- pos=api->index+delta; /* requested position */ |
- delta=pos-api->limit; /* remainder beyond buffered text */ |
- if(delta<=0) { |
- api->index=pos; /* position reached */ |
- break; |
- } |
- |
- /* go to end of buffer and normalize further */ |
- api->index=api->limit; |
- if(!uni->hasNext || !readNext(uni, iter)) { |
- break; /* reached end of text */ |
- } |
- } |
- } else /* delta<0 */ { |
- /* go backward until the requested position is in the buffer */ |
- for(;;) { |
- pos=api->index+delta; /* requested position */ |
- delta=pos-api->start; /* remainder beyond buffered text */ |
- if(delta>=0) { |
- api->index=pos; /* position reached */ |
- break; |
- } |
- |
- /* go to start of buffer and normalize further */ |
- api->index=api->start; |
- if(!uni->hasPrevious || !readPrevious(uni, iter)) { |
- break; /* reached start of text */ |
- } |
- } |
- } |
- |
- if(api->index==api->start && !uni->hasPrevious) { |
- return 0; |
- } else { |
- return UITER_UNKNOWN_INDEX; |
- } |
-} |
- |
-static UBool U_CALLCONV |
-unormIteratorHasNext(UCharIterator *api) { |
- return api->index<api->limit || ((UNormIterator *)api)->hasNext; |
-} |
- |
-static UBool U_CALLCONV |
-unormIteratorHasPrevious(UCharIterator *api) { |
- return api->index>api->start || ((UNormIterator *)api)->hasPrevious; |
-} |
- |
-static UChar32 U_CALLCONV |
-unormIteratorCurrent(UCharIterator *api) { |
- UNormIterator *uni=(UNormIterator *)api; |
- |
- if( api->index<api->limit || |
- (uni->hasNext && readNext(uni, uni->iter)) |
- ) { |
- return uni->chars[api->index]; |
- } else { |
- return U_SENTINEL; |
- } |
-} |
- |
-static UChar32 U_CALLCONV |
-unormIteratorNext(UCharIterator *api) { |
- UNormIterator *uni=(UNormIterator *)api; |
- |
- if( api->index<api->limit || |
- (uni->hasNext && readNext(uni, uni->iter)) |
- ) { |
- return uni->chars[api->index++]; |
- } else { |
- return U_SENTINEL; |
- } |
-} |
- |
-static UChar32 U_CALLCONV |
-unormIteratorPrevious(UCharIterator *api) { |
- UNormIterator *uni=(UNormIterator *)api; |
- |
- if( api->index>api->start || |
- (uni->hasPrevious && readPrevious(uni, uni->iter)) |
- ) { |
- return uni->chars[--api->index]; |
- } else { |
- return U_SENTINEL; |
- } |
-} |
- |
-static uint32_t U_CALLCONV |
-unormIteratorGetState(const UCharIterator *api) { |
- /* not uni->state because that may not be at api->index */ |
- return ((UNormIterator *)api)->states[api->index]; |
-} |
- |
-static void U_CALLCONV |
-unormIteratorSetState(UCharIterator *api, uint32_t state, UErrorCode *pErrorCode) { |
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
- /* do nothing */ |
- } else if(api==NULL) { |
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
- } else if(state==UITER_NO_STATE) { |
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
- } else { |
- UNormIterator *uni=(UNormIterator *)api; |
- UCharIterator *iter=((UNormIterator *)api)->iter; |
- if(state!=uni->state) { |
- uni->state=state; |
- uiter_setState(iter, state, pErrorCode); |
- } |
- |
- /* |
- * Try shortcuts: If the requested state is in the array contents |
- * then just set the index there. |
- * |
- * We assume that the state is unique per position! |
- */ |
- if(state==uni->states[api->index]) { |
- return; |
- } else if(state==uni->states[api->limit]) { |
- api->index=api->limit; |
- return; |
- } else { |
- /* search for the index with this state */ |
- int32_t i; |
- |
- for(i=api->start; i<api->limit; ++i) { |
- if(state==uni->states[i]) { |
- api->index=i; |
- return; |
- } |
- } |
- } |
- |
- /* there is no array index for this state, reset for fresh contents */ |
- initIndexes((UNormIterator *)api, iter); |
- uni->states[api->limit]=state; |
- } |
-} |
- |
-static const UCharIterator unormIterator={ |
- NULL, 0, 0, 0, 0, 0, |
- unormIteratorGetIndex, |
- unormIteratorMove, |
- unormIteratorHasNext, |
- unormIteratorHasPrevious, |
- unormIteratorCurrent, |
- unormIteratorNext, |
- unormIteratorPrevious, |
- NULL, |
- unormIteratorGetState, |
- unormIteratorSetState |
-}; |
- |
-/* Setup functions ---------------------------------------------------------- */ |
- |
-U_CAPI UNormIterator * U_EXPORT2 |
-unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode) { |
- UNormIterator *uni; |
- |
- /* argument checking */ |
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
- return NULL; |
- } |
- |
- /* allocate */ |
- uni=NULL; |
- if(stackMem!=NULL && stackMemSize>=sizeof(UNormIterator)) { |
- if(U_ALIGNMENT_OFFSET(stackMem)==0) { |
- /* already aligned */ |
- uni=(UNormIterator *)stackMem; |
- } else { |
- int32_t align=(int32_t)U_ALIGNMENT_OFFSET_UP(stackMem); |
- if((stackMemSize-=align)>=(int32_t)sizeof(UNormIterator)) { |
- /* needs alignment */ |
- uni=(UNormIterator *)((char *)stackMem+align); |
- } |
- } |
- /* else does not fit */ |
- } |
- |
- if(uni!=NULL) { |
- uni->isStackAllocated=TRUE; |
- } else { |
- uni=(UNormIterator *)uprv_malloc(sizeof(UNormIterator)); |
- if(uni==NULL) { |
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
- return NULL; |
- } |
- uni->isStackAllocated=FALSE; |
- } |
- |
- /* |
- * initialize |
- * do not memset because that would unnecessarily initialize the arrays |
- */ |
- uni->iter=NULL; |
- uni->chars=uni->charsBuffer; |
- uni->states=uni->statesBuffer; |
- uni->capacity=INITIAL_CAPACITY; |
- uni->state=UITER_NO_STATE; |
- uni->hasPrevious=uni->hasNext=FALSE; |
- uni->mode=UNORM_NONE; |
- |
- /* set a no-op iterator into the api */ |
- uiter_setString(&uni->api, NULL, 0); |
- return uni; |
-} |
- |
-U_CAPI void U_EXPORT2 |
-unorm_closeIter(UNormIterator *uni) { |
- if(uni!=NULL) { |
- if(uni->states!=uni->statesBuffer) { |
- /* chars and states are allocated in the same memory block */ |
- uprv_free(uni->states); |
- } |
- if(!uni->isStackAllocated) { |
- uprv_free(uni); |
- } |
- } |
-} |
- |
-U_CAPI UCharIterator * U_EXPORT2 |
-unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode) { |
- /* argument checking */ |
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
- return NULL; |
- } |
- if(uni==NULL) { |
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
- return NULL; |
- } |
- if( iter==NULL || iter->getState==NULL || iter->setState==NULL || |
- mode<UNORM_NONE || UNORM_MODE_COUNT<=mode |
- ) { |
- /* set a no-op iterator into the api */ |
- uiter_setString(&uni->api, NULL, 0); |
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
- return NULL; |
- } |
- |
- /* set the iterator and initialize */ |
- uprv_memcpy(&uni->api, &unormIterator, sizeof(unormIterator)); |
- |
- uni->iter=iter; |
- uni->mode=mode; |
- |
- initIndexes(uni, iter); |
- uni->states[uni->api.limit]=uni->state=uiter_getState(iter); |
- |
- return &uni->api; |
-} |
- |
-#endif /* uconfig.h switches */ |