Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: icu46/source/common/unorm_it.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « icu46/source/common/unorm_it.h ('k') | icu46/source/common/unormcmp.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: icu46/source/common/unorm_it.c
===================================================================
--- icu46/source/common/unorm_it.c (revision 0)
+++ icu46/source/common/unorm_it.c (revision 0)
@@ -0,0 +1,645 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2008, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unorm_it.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003jan21
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uiter.h"
+#include "unicode/unorm.h"
+#include "unorm_it.h"
+#include "cmemory.h"
+
+/* UNormIterator ------------------------------------------------------------ */
+
+enum {
+ INITIAL_CAPACITY=100
+};
+
+struct UNormIterator {
+ UCharIterator api;
+ UCharIterator *iter;
+
+ /*
+ * chars and states either use the static buffers
+ * or are allocated in the same memory block
+ *
+ * They are parallel arrays with states[] holding the getState() values
+ * from normalization boundaries, and UITER_NO_STATE in between.
+ */
+ UChar *chars;
+ uint32_t *states;
+
+ /*
+ * api.start: first valid character & state in the arrays
+ * api.index: current position
+ * api.limit: one past the last valid character in chars[], but states[limit] is valid
+ * capacity: length of allocated arrays
+ */
+ int32_t capacity;
+
+ /* the current iter->getState(), saved to avoid unnecessary setState() calls; may not correspond to api->index! */
+ uint32_t state;
+
+ /* there are UChars available before start or after limit? */
+ UBool hasPrevious, hasNext, isStackAllocated;
+
+ UNormalizationMode mode;
+
+ UChar charsBuffer[INITIAL_CAPACITY];
+ uint32_t statesBuffer[INITIAL_CAPACITY+1]; /* one more than charsBuffer[]! */
+};
+
+static void
+initIndexes(UNormIterator *uni, UCharIterator *iter) {
+ /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
+ UCharIterator *api=&uni->api;
+
+ if(!iter->hasPrevious(iter)) {
+ /* set indexes to the beginning of the arrays */
+ api->start=api->index=api->limit=0;
+ uni->hasPrevious=FALSE;
+ uni->hasNext=iter->hasNext(iter);
+ } else if(!iter->hasNext(iter)) {
+ /* set indexes to the end of the arrays */
+ api->start=api->index=api->limit=uni->capacity;
+ uni->hasNext=FALSE;
+ uni->hasPrevious=iter->hasPrevious(iter);
+ } else {
+ /* set indexes into the middle of the arrays */
+ api->start=api->index=api->limit=uni->capacity/2;
+ uni->hasPrevious=uni->hasNext=TRUE;
+ }
+}
+
+static UBool
+reallocArrays(UNormIterator *uni, int32_t capacity, UBool addAtStart) {
+ /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
+ UCharIterator *api=&uni->api;
+
+ uint32_t *states;
+ UChar *chars;
+ int32_t start, limit;
+
+ states=(uint32_t *)uprv_malloc((capacity+1)*4+capacity*2);
+ if(states==NULL) {
+ return FALSE;
+ }
+
+ chars=(UChar *)(states+(capacity+1));
+ uni->capacity=capacity;
+
+ start=api->start;
+ limit=api->limit;
+
+ if(addAtStart) {
+ /* copy old contents to the end of the new arrays */
+ int32_t delta;
+
+ delta=capacity-uni->capacity;
+ uprv_memcpy(states+delta+start, uni->states+start, (limit-start+1)*4);
+ uprv_memcpy(chars+delta+start, uni->chars+start, (limit-start)*4);
+
+ api->start=start+delta;
+ api->index+=delta;
+ api->limit=limit+delta;
+ } else {
+ /* copy old contents to the beginning of the new arrays */
+ uprv_memcpy(states+start, uni->states+start, (limit-start+1)*4);
+ uprv_memcpy(chars+start, uni->chars+start, (limit-start)*4);
+ }
+
+ uni->chars=chars;
+ uni->states=states;
+
+ return TRUE;
+}
+
+static void
+moveContentsTowardStart(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) {
+ /* move array contents up to make room */
+ int32_t srcIndex, destIndex, limit;
+
+ limit=api->limit;
+ srcIndex=delta;
+ if(srcIndex>api->start) {
+ /* look for a position in the arrays with a known state */
+ while(srcIndex<limit && states[srcIndex]==UITER_NO_STATE) {
+ ++srcIndex;
+ }
+ }
+
+ /* now actually move the array contents */
+ api->start=destIndex=0;
+ while(srcIndex<limit) {
+ chars[destIndex]=chars[srcIndex];
+ states[destIndex++]=states[srcIndex++];
+ }
+
+ /* copy states[limit] as well! */
+ states[destIndex]=states[srcIndex];
+
+ api->limit=destIndex;
+}
+
+static void
+moveContentsTowardEnd(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) {
+ /* move array contents up to make room */
+ int32_t srcIndex, destIndex, start;
+
+ start=api->start;
+ destIndex=((UNormIterator *)api)->capacity;
+ srcIndex=destIndex-delta;
+ if(srcIndex<api->limit) {
+ /* look for a position in the arrays with a known state */
+ while(srcIndex>start && states[srcIndex]==UITER_NO_STATE) {
+ --srcIndex;
+ }
+ }
+
+ /* now actually move the array contents */
+ api->limit=destIndex;
+
+ /* copy states[limit] as well! */
+ states[destIndex]=states[srcIndex];
+
+ while(srcIndex>start) {
+ chars[--destIndex]=chars[--srcIndex];
+ states[destIndex]=states[srcIndex];
+ }
+
+ api->start=destIndex;
+}
+
+/* normalize forward from the limit, assume hasNext is true */
+static UBool
+readNext(UNormIterator *uni, UCharIterator *iter) {
+ /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
+ UCharIterator *api=&uni->api;
+
+ /* make capacity/4 room at the end of the arrays */
+ int32_t limit, capacity, room;
+ UErrorCode errorCode;
+
+ limit=api->limit;
+ capacity=uni->capacity;
+ room=capacity/4;
+ if(room>(capacity-limit)) {
+ /* move array contents to make room */
+ moveContentsTowardStart(api, uni->chars, uni->states, room);
+ api->index=limit=api->limit;
+ uni->hasPrevious=TRUE;
+ }
+
+ /* normalize starting from the limit position */
+ errorCode=U_ZERO_ERROR;
+ if(uni->state!=uni->states[limit]) {
+ uiter_setState(iter, uni->states[limit], &errorCode);
+ if(U_FAILURE(errorCode)) {
+ uni->state=UITER_NO_STATE;
+ uni->hasNext=FALSE;
+ return FALSE;
+ }
+ }
+
+ room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode);
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ if(room<=capacity) {
+ /* empty and re-use the arrays */
+ uni->states[0]=uni->states[limit];
+ api->start=api->index=api->limit=limit=0;
+ uni->hasPrevious=TRUE;
+ } else {
+ capacity+=room+100;
+ if(!reallocArrays(uni, capacity, FALSE)) {
+ uni->state=UITER_NO_STATE;
+ uni->hasNext=FALSE;
+ return FALSE;
+ }
+ limit=api->limit;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ uiter_setState(iter, uni->states[limit], &errorCode);
+ room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode);
+ }
+ if(U_FAILURE(errorCode) || room==0) {
+ uni->state=UITER_NO_STATE;
+ uni->hasNext=FALSE;
+ return FALSE;
+ }
+
+ /* room>0 */
+ ++limit; /* leave the known states[limit] alone */
+ for(--room; room>0; --room) {
+ /* set unknown states for all but the normalization boundaries */
+ uni->states[limit++]=UITER_NO_STATE;
+ }
+ uni->states[limit]=uni->state=uiter_getState(iter);
+ uni->hasNext=iter->hasNext(iter);
+ api->limit=limit;
+ return TRUE;
+}
+
+/* normalize backward from the start, assume hasPrevious is true */
+static UBool
+readPrevious(UNormIterator *uni, UCharIterator *iter) {
+ /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
+ UCharIterator *api=&uni->api;
+
+ /* make capacity/4 room at the start of the arrays */
+ int32_t start, capacity, room;
+ UErrorCode errorCode;
+
+ start=api->start;
+ capacity=uni->capacity;
+ room=capacity/4;
+ if(room>start) {
+ /* move array contents to make room */
+ moveContentsTowardEnd(api, uni->chars, uni->states, room);
+ api->index=start=api->start;
+ uni->hasNext=TRUE;
+ }
+
+ /* normalize ending at the start position */
+ errorCode=U_ZERO_ERROR;
+ if(uni->state!=uni->states[start]) {
+ uiter_setState(iter, uni->states[start], &errorCode);
+ if(U_FAILURE(errorCode)) {
+ uni->state=UITER_NO_STATE;
+ uni->hasPrevious=FALSE;
+ return FALSE;
+ }
+ }
+
+ room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode);
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+ if(room<=capacity) {
+ /* empty and re-use the arrays */
+ uni->states[capacity]=uni->states[start];
+ api->start=api->index=api->limit=start=capacity;
+ uni->hasNext=TRUE;
+ } else {
+ capacity+=room+100;
+ if(!reallocArrays(uni, capacity, TRUE)) {
+ uni->state=UITER_NO_STATE;
+ uni->hasPrevious=FALSE;
+ return FALSE;
+ }
+ start=api->start;
+ }
+
+ errorCode=U_ZERO_ERROR;
+ uiter_setState(iter, uni->states[start], &errorCode);
+ room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode);
+ }
+ if(U_FAILURE(errorCode) || room==0) {
+ uni->state=UITER_NO_STATE;
+ uni->hasPrevious=FALSE;
+ return FALSE;
+ }
+
+ /* room>0 */
+ do {
+ /* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */
+ uni->chars[--start]=uni->chars[--room];
+ /* set unknown states for all but the normalization boundaries */
+ uni->states[start]=UITER_NO_STATE;
+ } while(room>0);
+ uni->states[start]=uni->state=uiter_getState(iter);
+ uni->hasPrevious=iter->hasPrevious(iter);
+ api->start=start;
+ return TRUE;
+}
+
+/* Iterator runtime API functions ------------------------------------------- */
+
+static int32_t U_CALLCONV
+unormIteratorGetIndex(UCharIterator *api, UCharIteratorOrigin origin) {
+ switch(origin) {
+ case UITER_ZERO:
+ case UITER_START:
+ return 0;
+ case UITER_CURRENT:
+ case UITER_LIMIT:
+ case UITER_LENGTH:
+ return UITER_UNKNOWN_INDEX;
+ default:
+ /* not a valid origin */
+ /* Should never get here! */
+ return -1;
+ }
+}
+
+static int32_t U_CALLCONV
+unormIteratorMove(UCharIterator *api, int32_t delta, UCharIteratorOrigin origin) {
+ UNormIterator *uni=(UNormIterator *)api;
+ UCharIterator *iter=uni->iter;
+ int32_t pos;
+
+ switch(origin) {
+ case UITER_ZERO:
+ case UITER_START:
+ /* restart from the beginning */
+ if(uni->hasPrevious) {
+ iter->move(iter, 0, UITER_START);
+ api->start=api->index=api->limit=0;
+ uni->states[api->limit]=uni->state=uiter_getState(iter);
+ uni->hasPrevious=FALSE;
+ uni->hasNext=iter->hasNext(iter);
+ } else {
+ /* we already have the beginning of the normalized text */
+ api->index=api->start;
+ }
+ break;
+ case UITER_CURRENT:
+ break;
+ case UITER_LIMIT:
+ case UITER_LENGTH:
+ /* restart from the end */
+ if(uni->hasNext) {
+ iter->move(iter, 0, UITER_LIMIT);
+ api->start=api->index=api->limit=uni->capacity;
+ uni->states[api->limit]=uni->state=uiter_getState(iter);
+ uni->hasPrevious=iter->hasPrevious(iter);
+ uni->hasNext=FALSE;
+ } else {
+ /* we already have the end of the normalized text */
+ api->index=api->limit;
+ }
+ break;
+ default:
+ return -1; /* Error */
+ }
+
+ /* move relative to the current position by delta normalized UChars */
+ if(delta==0) {
+ /* nothing to do */
+ } else if(delta>0) {
+ /* go forward until the requested position is in the buffer */
+ for(;;) {
+ pos=api->index+delta; /* requested position */
+ delta=pos-api->limit; /* remainder beyond buffered text */
+ if(delta<=0) {
+ api->index=pos; /* position reached */
+ break;
+ }
+
+ /* go to end of buffer and normalize further */
+ api->index=api->limit;
+ if(!uni->hasNext || !readNext(uni, iter)) {
+ break; /* reached end of text */
+ }
+ }
+ } else /* delta<0 */ {
+ /* go backward until the requested position is in the buffer */
+ for(;;) {
+ pos=api->index+delta; /* requested position */
+ delta=pos-api->start; /* remainder beyond buffered text */
+ if(delta>=0) {
+ api->index=pos; /* position reached */
+ break;
+ }
+
+ /* go to start of buffer and normalize further */
+ api->index=api->start;
+ if(!uni->hasPrevious || !readPrevious(uni, iter)) {
+ break; /* reached start of text */
+ }
+ }
+ }
+
+ if(api->index==api->start && !uni->hasPrevious) {
+ return 0;
+ } else {
+ return UITER_UNKNOWN_INDEX;
+ }
+}
+
+static UBool U_CALLCONV
+unormIteratorHasNext(UCharIterator *api) {
+ return api->index<api->limit || ((UNormIterator *)api)->hasNext;
+}
+
+static UBool U_CALLCONV
+unormIteratorHasPrevious(UCharIterator *api) {
+ return api->index>api->start || ((UNormIterator *)api)->hasPrevious;
+}
+
+static UChar32 U_CALLCONV
+unormIteratorCurrent(UCharIterator *api) {
+ UNormIterator *uni=(UNormIterator *)api;
+
+ if( api->index<api->limit ||
+ (uni->hasNext && readNext(uni, uni->iter))
+ ) {
+ return uni->chars[api->index];
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+unormIteratorNext(UCharIterator *api) {
+ UNormIterator *uni=(UNormIterator *)api;
+
+ if( api->index<api->limit ||
+ (uni->hasNext && readNext(uni, uni->iter))
+ ) {
+ return uni->chars[api->index++];
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static UChar32 U_CALLCONV
+unormIteratorPrevious(UCharIterator *api) {
+ UNormIterator *uni=(UNormIterator *)api;
+
+ if( api->index>api->start ||
+ (uni->hasPrevious && readPrevious(uni, uni->iter))
+ ) {
+ return uni->chars[--api->index];
+ } else {
+ return U_SENTINEL;
+ }
+}
+
+static uint32_t U_CALLCONV
+unormIteratorGetState(const UCharIterator *api) {
+ /* not uni->state because that may not be at api->index */
+ return ((UNormIterator *)api)->states[api->index];
+}
+
+static void U_CALLCONV
+unormIteratorSetState(UCharIterator *api, uint32_t state, UErrorCode *pErrorCode) {
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ /* do nothing */
+ } else if(api==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ } else if(state==UITER_NO_STATE) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ } else {
+ UNormIterator *uni=(UNormIterator *)api;
+ UCharIterator *iter=((UNormIterator *)api)->iter;
+ if(state!=uni->state) {
+ uni->state=state;
+ uiter_setState(iter, state, pErrorCode);
+ }
+
+ /*
+ * Try shortcuts: If the requested state is in the array contents
+ * then just set the index there.
+ *
+ * We assume that the state is unique per position!
+ */
+ if(state==uni->states[api->index]) {
+ return;
+ } else if(state==uni->states[api->limit]) {
+ api->index=api->limit;
+ return;
+ } else {
+ /* search for the index with this state */
+ int32_t i;
+
+ for(i=api->start; i<api->limit; ++i) {
+ if(state==uni->states[i]) {
+ api->index=i;
+ return;
+ }
+ }
+ }
+
+ /* there is no array index for this state, reset for fresh contents */
+ initIndexes((UNormIterator *)api, iter);
+ uni->states[api->limit]=state;
+ }
+}
+
+static const UCharIterator unormIterator={
+ NULL, 0, 0, 0, 0, 0,
+ unormIteratorGetIndex,
+ unormIteratorMove,
+ unormIteratorHasNext,
+ unormIteratorHasPrevious,
+ unormIteratorCurrent,
+ unormIteratorNext,
+ unormIteratorPrevious,
+ NULL,
+ unormIteratorGetState,
+ unormIteratorSetState
+};
+
+/* Setup functions ---------------------------------------------------------- */
+
+U_CAPI UNormIterator * U_EXPORT2
+unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode) {
+ UNormIterator *uni;
+
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+
+ /* allocate */
+ uni=NULL;
+ if(stackMem!=NULL && stackMemSize>=sizeof(UNormIterator)) {
+ if(U_ALIGNMENT_OFFSET(stackMem)==0) {
+ /* already aligned */
+ uni=(UNormIterator *)stackMem;
+ } else {
+ int32_t align=(int32_t)U_ALIGNMENT_OFFSET_UP(stackMem);
+ if((stackMemSize-=align)>=(int32_t)sizeof(UNormIterator)) {
+ /* needs alignment */
+ uni=(UNormIterator *)((char *)stackMem+align);
+ }
+ }
+ /* else does not fit */
+ }
+
+ if(uni!=NULL) {
+ uni->isStackAllocated=TRUE;
+ } else {
+ uni=(UNormIterator *)uprv_malloc(sizeof(UNormIterator));
+ if(uni==NULL) {
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uni->isStackAllocated=FALSE;
+ }
+
+ /*
+ * initialize
+ * do not memset because that would unnecessarily initialize the arrays
+ */
+ uni->iter=NULL;
+ uni->chars=uni->charsBuffer;
+ uni->states=uni->statesBuffer;
+ uni->capacity=INITIAL_CAPACITY;
+ uni->state=UITER_NO_STATE;
+ uni->hasPrevious=uni->hasNext=FALSE;
+ uni->mode=UNORM_NONE;
+
+ /* set a no-op iterator into the api */
+ uiter_setString(&uni->api, NULL, 0);
+ return uni;
+}
+
+U_CAPI void U_EXPORT2
+unorm_closeIter(UNormIterator *uni) {
+ if(uni!=NULL) {
+ if(uni->states!=uni->statesBuffer) {
+ /* chars and states are allocated in the same memory block */
+ uprv_free(uni->states);
+ }
+ if(!uni->isStackAllocated) {
+ uprv_free(uni);
+ }
+ }
+}
+
+U_CAPI UCharIterator * U_EXPORT2
+unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode) {
+ /* argument checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return NULL;
+ }
+ if(uni==NULL) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ if( iter==NULL || iter->getState==NULL || iter->setState==NULL ||
+ mode<UNORM_NONE || UNORM_MODE_COUNT<=mode
+ ) {
+ /* set a no-op iterator into the api */
+ uiter_setString(&uni->api, NULL, 0);
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ /* set the iterator and initialize */
+ uprv_memcpy(&uni->api, &unormIterator, sizeof(unormIterator));
+
+ uni->iter=iter;
+ uni->mode=mode;
+
+ initIndexes(uni, iter);
+ uni->states[uni->api.limit]=uni->state=uiter_getState(iter);
+
+ return &uni->api;
+}
+
+#endif /* uconfig.h switches */
Property changes on: icu46/source/common/unorm_it.c
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « icu46/source/common/unorm_it.h ('k') | icu46/source/common/unormcmp.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698