Index: icu46/source/common/propsvec.c |
=================================================================== |
--- icu46/source/common/propsvec.c (revision 0) |
+++ icu46/source/common/propsvec.c (revision 0) |
@@ -0,0 +1,523 @@ |
+/* |
+******************************************************************************* |
+* |
+* Copyright (C) 2002-2010, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+* |
+******************************************************************************* |
+* file name: propsvec.c |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:4 |
+* |
+* created on: 2002feb22 |
+* created by: Markus W. Scherer |
+* |
+* Store bits (Unicode character properties) in bit set vectors. |
+*/ |
+ |
+#include <stdlib.h> |
+#include "unicode/utypes.h" |
+#include "cmemory.h" |
+#include "utrie.h" |
+#include "utrie2.h" |
+#include "uarrsort.h" |
+#include "propsvec.h" |
+ |
+struct UPropsVectors { |
+ uint32_t *v; |
+ int32_t columns; /* number of columns, plus two for start & limit values */ |
+ int32_t maxRows; |
+ int32_t rows; |
+ int32_t prevRow; /* search optimization: remember last row seen */ |
+ UBool isCompacted; |
+}; |
+ |
+#define UPVEC_INITIAL_ROWS (1<<12) |
+#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) |
+#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) |
+ |
+U_CAPI UPropsVectors * U_EXPORT2 |
+upvec_open(int32_t columns, UErrorCode *pErrorCode) { |
+ UPropsVectors *pv; |
+ uint32_t *v, *row; |
+ uint32_t cp; |
+ |
+ if(U_FAILURE(*pErrorCode)) { |
+ return NULL; |
+ } |
+ if(columns<1) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return NULL; |
+ } |
+ columns+=2; /* count range start and limit columns */ |
+ |
+ pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); |
+ v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); |
+ if(pv==NULL || v==NULL) { |
+ uprv_free(pv); |
+ uprv_free(v); |
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
+ return NULL; |
+ } |
+ uprv_memset(pv, 0, sizeof(UPropsVectors)); |
+ pv->v=v; |
+ pv->columns=columns; |
+ pv->maxRows=UPVEC_INITIAL_ROWS; |
+ pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); |
+ |
+ /* set the all-Unicode row and the special-value rows */ |
+ row=pv->v; |
+ uprv_memset(row, 0, pv->rows*columns*4); |
+ row[0]=0; |
+ row[1]=0x110000; |
+ row+=columns; |
+ for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { |
+ row[0]=cp; |
+ row[1]=cp+1; |
+ row+=columns; |
+ } |
+ return pv; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+upvec_close(UPropsVectors *pv) { |
+ if(pv!=NULL) { |
+ uprv_free(pv->v); |
+ uprv_free(pv); |
+ } |
+} |
+ |
+static uint32_t * |
+_findRow(UPropsVectors *pv, UChar32 rangeStart) { |
+ uint32_t *row; |
+ int32_t columns, i, start, limit, prevRow, rows; |
+ |
+ columns=pv->columns; |
+ rows=limit=pv->rows; |
+ prevRow=pv->prevRow; |
+ |
+ /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ |
+ row=pv->v+prevRow*columns; |
+ if(rangeStart>=(UChar32)row[0]) { |
+ if(rangeStart<(UChar32)row[1]) { |
+ /* same row as last seen */ |
+ return row; |
+ } else if(rangeStart<(UChar32)(row+=columns)[1]) { |
+ /* next row after the last one */ |
+ pv->prevRow=prevRow+1; |
+ return row; |
+ } else if(rangeStart<(UChar32)(row+=columns)[1]) { |
+ /* second row after the last one */ |
+ pv->prevRow=prevRow+2; |
+ return row; |
+ } else if((rangeStart-(UChar32)row[1])<10) { |
+ /* we are close, continue looping */ |
+ prevRow+=2; |
+ do { |
+ ++prevRow; |
+ row+=columns; |
+ } while(rangeStart>=(UChar32)row[1]); |
+ pv->prevRow=prevRow; |
+ return row; |
+ } |
+ } else if(rangeStart<(UChar32)pv->v[1]) { |
+ /* the very first row */ |
+ pv->prevRow=0; |
+ return pv->v; |
+ } |
+ |
+ /* do a binary search for the start of the range */ |
+ start=0; |
+ while(start<limit-1) { |
+ i=(start+limit)/2; |
+ row=pv->v+i*columns; |
+ if(rangeStart<(UChar32)row[0]) { |
+ limit=i; |
+ } else if(rangeStart<(UChar32)row[1]) { |
+ pv->prevRow=i; |
+ return row; |
+ } else { |
+ start=i; |
+ } |
+ } |
+ |
+ /* must be found because all ranges together always cover all of Unicode */ |
+ pv->prevRow=start; |
+ return pv->v+start*columns; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+upvec_setValue(UPropsVectors *pv, |
+ UChar32 start, UChar32 end, |
+ int32_t column, |
+ uint32_t value, uint32_t mask, |
+ UErrorCode *pErrorCode) { |
+ uint32_t *firstRow, *lastRow; |
+ int32_t columns; |
+ UChar32 limit; |
+ UBool splitFirstRow, splitLastRow; |
+ |
+ /* argument checking */ |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ if( pv==NULL || |
+ start<0 || start>end || end>UPVEC_MAX_CP || |
+ column<0 || column>=(pv->columns-2) |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ if(pv->isCompacted) { |
+ *pErrorCode=U_NO_WRITE_PERMISSION; |
+ return; |
+ } |
+ limit=end+1; |
+ |
+ /* initialize */ |
+ columns=pv->columns; |
+ column+=2; /* skip range start and limit columns */ |
+ value&=mask; |
+ |
+ /* find the rows whose ranges overlap with the input range */ |
+ |
+ /* find the first and last rows, always successful */ |
+ firstRow=_findRow(pv, start); |
+ lastRow=_findRow(pv, end); |
+ |
+ /* |
+ * Rows need to be split if they partially overlap with the |
+ * input range (only possible for the first and last rows) |
+ * and if their value differs from the input value. |
+ */ |
+ splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); |
+ splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); |
+ |
+ /* split first/last rows if necessary */ |
+ if(splitFirstRow || splitLastRow) { |
+ int32_t count, rows; |
+ |
+ rows=pv->rows; |
+ if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { |
+ uint32_t *newVectors; |
+ int32_t newMaxRows; |
+ |
+ if(pv->maxRows<UPVEC_MEDIUM_ROWS) { |
+ newMaxRows=UPVEC_MEDIUM_ROWS; |
+ } else if(pv->maxRows<UPVEC_MAX_ROWS) { |
+ newMaxRows=UPVEC_MAX_ROWS; |
+ } else { |
+ /* Implementation bug, or UPVEC_MAX_ROWS too low. */ |
+ *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
+ return; |
+ } |
+ newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); |
+ if(newVectors==NULL) { |
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ uprv_memcpy(newVectors, pv->v, rows*columns*4); |
+ firstRow=newVectors+(firstRow-pv->v); |
+ lastRow=newVectors+(lastRow-pv->v); |
+ uprv_free(pv->v); |
+ pv->v=newVectors; |
+ pv->maxRows=newMaxRows; |
+ } |
+ |
+ /* count the number of row cells to move after the last row, and move them */ |
+ count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); |
+ if(count>0) { |
+ uprv_memmove( |
+ lastRow+(1+splitFirstRow+splitLastRow)*columns, |
+ lastRow+columns, |
+ count*4); |
+ } |
+ pv->rows=rows+splitFirstRow+splitLastRow; |
+ |
+ /* split the first row, and move the firstRow pointer to the second part */ |
+ if(splitFirstRow) { |
+ /* copy all affected rows up one and move the lastRow pointer */ |
+ count = (int32_t)((lastRow-firstRow)+columns); |
+ uprv_memmove(firstRow+columns, firstRow, count*4); |
+ lastRow+=columns; |
+ |
+ /* split the range and move the firstRow pointer */ |
+ firstRow[1]=firstRow[columns]=(uint32_t)start; |
+ firstRow+=columns; |
+ } |
+ |
+ /* split the last row */ |
+ if(splitLastRow) { |
+ /* copy the last row data */ |
+ uprv_memcpy(lastRow+columns, lastRow, columns*4); |
+ |
+ /* split the range and move the firstRow pointer */ |
+ lastRow[1]=lastRow[columns]=(uint32_t)limit; |
+ } |
+ } |
+ |
+ /* set the "row last seen" to the last row for the range */ |
+ pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); |
+ |
+ /* set the input value in all remaining rows */ |
+ firstRow+=column; |
+ lastRow+=column; |
+ mask=~mask; |
+ for(;;) { |
+ *firstRow=(*firstRow&mask)|value; |
+ if(firstRow==lastRow) { |
+ break; |
+ } |
+ firstRow+=columns; |
+ } |
+} |
+ |
+U_CAPI uint32_t U_EXPORT2 |
+upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { |
+ uint32_t *row; |
+ UPropsVectors *ncpv; |
+ |
+ if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { |
+ return 0; |
+ } |
+ ncpv=(UPropsVectors *)pv; |
+ row=_findRow(ncpv, c); |
+ return row[2+column]; |
+} |
+ |
+U_CAPI uint32_t * U_EXPORT2 |
+upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, |
+ UChar32 *pRangeStart, UChar32 *pRangeEnd) { |
+ uint32_t *row; |
+ int32_t columns; |
+ |
+ if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { |
+ return NULL; |
+ } |
+ |
+ columns=pv->columns; |
+ row=pv->v+rowIndex*columns; |
+ if(pRangeStart!=NULL) { |
+ *pRangeStart=(UChar32)row[0]; |
+ } |
+ if(pRangeEnd!=NULL) { |
+ *pRangeEnd=(UChar32)row[1]-1; |
+ } |
+ return row+2; |
+} |
+ |
+static int32_t U_CALLCONV |
+upvec_compareRows(const void *context, const void *l, const void *r) { |
+ const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; |
+ const UPropsVectors *pv=(const UPropsVectors *)context; |
+ int32_t i, count, columns; |
+ |
+ count=columns=pv->columns; /* includes start/limit columns */ |
+ |
+ /* start comparing after start/limit but wrap around to them */ |
+ i=2; |
+ do { |
+ if(left[i]!=right[i]) { |
+ return left[i]<right[i] ? -1 : 1; |
+ } |
+ if(++i==columns) { |
+ i=0; |
+ } |
+ } while(--count>0); |
+ |
+ return 0; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { |
+ uint32_t *row; |
+ int32_t i, columns, valueColumns, rows, count; |
+ UChar32 start, limit; |
+ |
+ /* argument checking */ |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ if(handler==NULL) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ if(pv->isCompacted) { |
+ return; |
+ } |
+ |
+ /* Set the flag now: Sorting and compacting destroys the builder data structure. */ |
+ pv->isCompacted=TRUE; |
+ |
+ rows=pv->rows; |
+ columns=pv->columns; |
+ valueColumns=columns-2; /* not counting start & limit */ |
+ |
+ /* sort the properties vectors to find unique vector values */ |
+ uprv_sortArray(pv->v, rows, columns*4, |
+ upvec_compareRows, pv, FALSE, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ |
+ /* |
+ * Find and set the special values. |
+ * This has to do almost the same work as the compaction below, |
+ * to find the indexes where the special-value rows will move. |
+ */ |
+ row=pv->v; |
+ count=-valueColumns; |
+ for(i=0; i<rows; ++i) { |
+ start=(UChar32)row[0]; |
+ |
+ /* count a new values vector if it is different from the current one */ |
+ if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { |
+ count+=valueColumns; |
+ } |
+ |
+ if(start>=UPVEC_FIRST_SPECIAL_CP) { |
+ handler(context, start, start, count, row+2, valueColumns, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ } |
+ |
+ row+=columns; |
+ } |
+ |
+ /* count is at the beginning of the last vector, add valueColumns to include that last vector */ |
+ count+=valueColumns; |
+ |
+ /* Call the handler once more to signal the start of delivering real values. */ |
+ handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, |
+ count, row-valueColumns, valueColumns, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ |
+ /* |
+ * Move vector contents up to a contiguous array with only unique |
+ * vector values, and call the handler function for each vector. |
+ * |
+ * This destroys the Properties Vector structure and replaces it |
+ * with an array of just vector values. |
+ */ |
+ row=pv->v; |
+ count=-valueColumns; |
+ for(i=0; i<rows; ++i) { |
+ /* fetch these first before memmove() may overwrite them */ |
+ start=(UChar32)row[0]; |
+ limit=(UChar32)row[1]; |
+ |
+ /* add a new values vector if it is different from the current one */ |
+ if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { |
+ count+=valueColumns; |
+ uprv_memmove(pv->v+count, row+2, valueColumns*4); |
+ } |
+ |
+ if(start<UPVEC_FIRST_SPECIAL_CP) { |
+ handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ } |
+ |
+ row+=columns; |
+ } |
+ |
+ /* count is at the beginning of the last vector, add one to include that last vector */ |
+ pv->rows=count/valueColumns+1; |
+} |
+ |
+U_CAPI const uint32_t * U_EXPORT2 |
+upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { |
+ if(!pv->isCompacted) { |
+ return NULL; |
+ } |
+ if(pRows!=NULL) { |
+ *pRows=pv->rows; |
+ } |
+ if(pColumns!=NULL) { |
+ *pColumns=pv->columns-2; |
+ } |
+ return pv->v; |
+} |
+ |
+U_CAPI uint32_t * U_EXPORT2 |
+upvec_cloneArray(const UPropsVectors *pv, |
+ int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { |
+ uint32_t *clonedArray; |
+ int32_t byteLength; |
+ |
+ if(U_FAILURE(*pErrorCode)) { |
+ return NULL; |
+ } |
+ if(!pv->isCompacted) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return NULL; |
+ } |
+ byteLength=pv->rows*(pv->columns-2)*4; |
+ clonedArray=(uint32_t *)uprv_malloc(byteLength); |
+ if(clonedArray==NULL) { |
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
+ return NULL; |
+ } |
+ uprv_memcpy(clonedArray, pv->v, byteLength); |
+ if(pRows!=NULL) { |
+ *pRows=pv->rows; |
+ } |
+ if(pColumns!=NULL) { |
+ *pColumns=pv->columns-2; |
+ } |
+ return clonedArray; |
+} |
+ |
+U_CAPI UTrie2 * U_EXPORT2 |
+upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { |
+ UPVecToUTrie2Context toUTrie2={ NULL }; |
+ upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); |
+ utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ utrie2_close(toUTrie2.trie); |
+ toUTrie2.trie=NULL; |
+ } |
+ return toUTrie2.trie; |
+} |
+ |
+/* |
+ * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts |
+ * some 16-bit field and builds and returns a UTrie2. |
+ */ |
+ |
+U_CAPI void U_CALLCONV |
+upvec_compactToUTrie2Handler(void *context, |
+ UChar32 start, UChar32 end, |
+ int32_t rowIndex, uint32_t *row, int32_t columns, |
+ UErrorCode *pErrorCode) { |
+ UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; |
+ if(start<UPVEC_FIRST_SPECIAL_CP) { |
+ utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); |
+ } else { |
+ switch(start) { |
+ case UPVEC_INITIAL_VALUE_CP: |
+ toUTrie2->initialValue=rowIndex; |
+ break; |
+ case UPVEC_ERROR_VALUE_CP: |
+ toUTrie2->errorValue=rowIndex; |
+ break; |
+ case UPVEC_START_REAL_VALUES_CP: |
+ toUTrie2->maxValue=rowIndex; |
+ if(rowIndex>0xffff) { |
+ /* too many rows for a 16-bit trie */ |
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
+ } else { |
+ toUTrie2->trie=utrie2_open(toUTrie2->initialValue, |
+ toUTrie2->errorValue, pErrorCode); |
+ } |
+ break; |
+ default: |
+ break; |
+ } |
+ } |
+} |
Property changes on: icu46/source/common/propsvec.c |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |