OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2002-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: propsvec.c |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2002feb22 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * Store bits (Unicode character properties) in bit set vectors. |
| 17 */ |
| 18 |
| 19 #include <stdlib.h> |
| 20 #include "unicode/utypes.h" |
| 21 #include "cmemory.h" |
| 22 #include "utrie.h" |
| 23 #include "utrie2.h" |
| 24 #include "uarrsort.h" |
| 25 #include "propsvec.h" |
| 26 |
| 27 struct UPropsVectors { |
| 28 uint32_t *v; |
| 29 int32_t columns; /* number of columns, plus two for start & limit values */ |
| 30 int32_t maxRows; |
| 31 int32_t rows; |
| 32 int32_t prevRow; /* search optimization: remember last row seen */ |
| 33 UBool isCompacted; |
| 34 }; |
| 35 |
| 36 #define UPVEC_INITIAL_ROWS (1<<12) |
| 37 #define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) |
| 38 #define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) |
| 39 |
| 40 U_CAPI UPropsVectors * U_EXPORT2 |
| 41 upvec_open(int32_t columns, UErrorCode *pErrorCode) { |
| 42 UPropsVectors *pv; |
| 43 uint32_t *v, *row; |
| 44 uint32_t cp; |
| 45 |
| 46 if(U_FAILURE(*pErrorCode)) { |
| 47 return NULL; |
| 48 } |
| 49 if(columns<1) { |
| 50 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 51 return NULL; |
| 52 } |
| 53 columns+=2; /* count range start and limit columns */ |
| 54 |
| 55 pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); |
| 56 v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); |
| 57 if(pv==NULL || v==NULL) { |
| 58 uprv_free(pv); |
| 59 uprv_free(v); |
| 60 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 61 return NULL; |
| 62 } |
| 63 uprv_memset(pv, 0, sizeof(UPropsVectors)); |
| 64 pv->v=v; |
| 65 pv->columns=columns; |
| 66 pv->maxRows=UPVEC_INITIAL_ROWS; |
| 67 pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); |
| 68 |
| 69 /* set the all-Unicode row and the special-value rows */ |
| 70 row=pv->v; |
| 71 uprv_memset(row, 0, pv->rows*columns*4); |
| 72 row[0]=0; |
| 73 row[1]=0x110000; |
| 74 row+=columns; |
| 75 for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { |
| 76 row[0]=cp; |
| 77 row[1]=cp+1; |
| 78 row+=columns; |
| 79 } |
| 80 return pv; |
| 81 } |
| 82 |
| 83 U_CAPI void U_EXPORT2 |
| 84 upvec_close(UPropsVectors *pv) { |
| 85 if(pv!=NULL) { |
| 86 uprv_free(pv->v); |
| 87 uprv_free(pv); |
| 88 } |
| 89 } |
| 90 |
| 91 static uint32_t * |
| 92 _findRow(UPropsVectors *pv, UChar32 rangeStart) { |
| 93 uint32_t *row; |
| 94 int32_t columns, i, start, limit, prevRow, rows; |
| 95 |
| 96 columns=pv->columns; |
| 97 rows=limit=pv->rows; |
| 98 prevRow=pv->prevRow; |
| 99 |
| 100 /* check the vicinity of the last-seen row (start searching with an unrolled
loop) */ |
| 101 row=pv->v+prevRow*columns; |
| 102 if(rangeStart>=(UChar32)row[0]) { |
| 103 if(rangeStart<(UChar32)row[1]) { |
| 104 /* same row as last seen */ |
| 105 return row; |
| 106 } else if(rangeStart<(UChar32)(row+=columns)[1]) { |
| 107 /* next row after the last one */ |
| 108 pv->prevRow=prevRow+1; |
| 109 return row; |
| 110 } else if(rangeStart<(UChar32)(row+=columns)[1]) { |
| 111 /* second row after the last one */ |
| 112 pv->prevRow=prevRow+2; |
| 113 return row; |
| 114 } else if((rangeStart-(UChar32)row[1])<10) { |
| 115 /* we are close, continue looping */ |
| 116 prevRow+=2; |
| 117 do { |
| 118 ++prevRow; |
| 119 row+=columns; |
| 120 } while(rangeStart>=(UChar32)row[1]); |
| 121 pv->prevRow=prevRow; |
| 122 return row; |
| 123 } |
| 124 } else if(rangeStart<(UChar32)pv->v[1]) { |
| 125 /* the very first row */ |
| 126 pv->prevRow=0; |
| 127 return pv->v; |
| 128 } |
| 129 |
| 130 /* do a binary search for the start of the range */ |
| 131 start=0; |
| 132 while(start<limit-1) { |
| 133 i=(start+limit)/2; |
| 134 row=pv->v+i*columns; |
| 135 if(rangeStart<(UChar32)row[0]) { |
| 136 limit=i; |
| 137 } else if(rangeStart<(UChar32)row[1]) { |
| 138 pv->prevRow=i; |
| 139 return row; |
| 140 } else { |
| 141 start=i; |
| 142 } |
| 143 } |
| 144 |
| 145 /* must be found because all ranges together always cover all of Unicode */ |
| 146 pv->prevRow=start; |
| 147 return pv->v+start*columns; |
| 148 } |
| 149 |
| 150 U_CAPI void U_EXPORT2 |
| 151 upvec_setValue(UPropsVectors *pv, |
| 152 UChar32 start, UChar32 end, |
| 153 int32_t column, |
| 154 uint32_t value, uint32_t mask, |
| 155 UErrorCode *pErrorCode) { |
| 156 uint32_t *firstRow, *lastRow; |
| 157 int32_t columns; |
| 158 UChar32 limit; |
| 159 UBool splitFirstRow, splitLastRow; |
| 160 |
| 161 /* argument checking */ |
| 162 if(U_FAILURE(*pErrorCode)) { |
| 163 return; |
| 164 } |
| 165 if( pv==NULL || |
| 166 start<0 || start>end || end>UPVEC_MAX_CP || |
| 167 column<0 || column>=(pv->columns-2) |
| 168 ) { |
| 169 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 170 return; |
| 171 } |
| 172 if(pv->isCompacted) { |
| 173 *pErrorCode=U_NO_WRITE_PERMISSION; |
| 174 return; |
| 175 } |
| 176 limit=end+1; |
| 177 |
| 178 /* initialize */ |
| 179 columns=pv->columns; |
| 180 column+=2; /* skip range start and limit columns */ |
| 181 value&=mask; |
| 182 |
| 183 /* find the rows whose ranges overlap with the input range */ |
| 184 |
| 185 /* find the first and last rows, always successful */ |
| 186 firstRow=_findRow(pv, start); |
| 187 lastRow=_findRow(pv, end); |
| 188 |
| 189 /* |
| 190 * Rows need to be split if they partially overlap with the |
| 191 * input range (only possible for the first and last rows) |
| 192 * and if their value differs from the input value. |
| 193 */ |
| 194 splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[colum
n]&mask)); |
| 195 splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&
mask)); |
| 196 |
| 197 /* split first/last rows if necessary */ |
| 198 if(splitFirstRow || splitLastRow) { |
| 199 int32_t count, rows; |
| 200 |
| 201 rows=pv->rows; |
| 202 if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { |
| 203 uint32_t *newVectors; |
| 204 int32_t newMaxRows; |
| 205 |
| 206 if(pv->maxRows<UPVEC_MEDIUM_ROWS) { |
| 207 newMaxRows=UPVEC_MEDIUM_ROWS; |
| 208 } else if(pv->maxRows<UPVEC_MAX_ROWS) { |
| 209 newMaxRows=UPVEC_MAX_ROWS; |
| 210 } else { |
| 211 /* Implementation bug, or UPVEC_MAX_ROWS too low. */ |
| 212 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 213 return; |
| 214 } |
| 215 newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); |
| 216 if(newVectors==NULL) { |
| 217 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 218 return; |
| 219 } |
| 220 uprv_memcpy(newVectors, pv->v, rows*columns*4); |
| 221 firstRow=newVectors+(firstRow-pv->v); |
| 222 lastRow=newVectors+(lastRow-pv->v); |
| 223 uprv_free(pv->v); |
| 224 pv->v=newVectors; |
| 225 pv->maxRows=newMaxRows; |
| 226 } |
| 227 |
| 228 /* count the number of row cells to move after the last row, and move th
em */ |
| 229 count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); |
| 230 if(count>0) { |
| 231 uprv_memmove( |
| 232 lastRow+(1+splitFirstRow+splitLastRow)*columns, |
| 233 lastRow+columns, |
| 234 count*4); |
| 235 } |
| 236 pv->rows=rows+splitFirstRow+splitLastRow; |
| 237 |
| 238 /* split the first row, and move the firstRow pointer to the second part
*/ |
| 239 if(splitFirstRow) { |
| 240 /* copy all affected rows up one and move the lastRow pointer */ |
| 241 count = (int32_t)((lastRow-firstRow)+columns); |
| 242 uprv_memmove(firstRow+columns, firstRow, count*4); |
| 243 lastRow+=columns; |
| 244 |
| 245 /* split the range and move the firstRow pointer */ |
| 246 firstRow[1]=firstRow[columns]=(uint32_t)start; |
| 247 firstRow+=columns; |
| 248 } |
| 249 |
| 250 /* split the last row */ |
| 251 if(splitLastRow) { |
| 252 /* copy the last row data */ |
| 253 uprv_memcpy(lastRow+columns, lastRow, columns*4); |
| 254 |
| 255 /* split the range and move the firstRow pointer */ |
| 256 lastRow[1]=lastRow[columns]=(uint32_t)limit; |
| 257 } |
| 258 } |
| 259 |
| 260 /* set the "row last seen" to the last row for the range */ |
| 261 pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); |
| 262 |
| 263 /* set the input value in all remaining rows */ |
| 264 firstRow+=column; |
| 265 lastRow+=column; |
| 266 mask=~mask; |
| 267 for(;;) { |
| 268 *firstRow=(*firstRow&mask)|value; |
| 269 if(firstRow==lastRow) { |
| 270 break; |
| 271 } |
| 272 firstRow+=columns; |
| 273 } |
| 274 } |
| 275 |
| 276 U_CAPI uint32_t U_EXPORT2 |
| 277 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { |
| 278 uint32_t *row; |
| 279 UPropsVectors *ncpv; |
| 280 |
| 281 if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->colu
mns-2)) { |
| 282 return 0; |
| 283 } |
| 284 ncpv=(UPropsVectors *)pv; |
| 285 row=_findRow(ncpv, c); |
| 286 return row[2+column]; |
| 287 } |
| 288 |
| 289 U_CAPI uint32_t * U_EXPORT2 |
| 290 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, |
| 291 UChar32 *pRangeStart, UChar32 *pRangeEnd) { |
| 292 uint32_t *row; |
| 293 int32_t columns; |
| 294 |
| 295 if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { |
| 296 return NULL; |
| 297 } |
| 298 |
| 299 columns=pv->columns; |
| 300 row=pv->v+rowIndex*columns; |
| 301 if(pRangeStart!=NULL) { |
| 302 *pRangeStart=(UChar32)row[0]; |
| 303 } |
| 304 if(pRangeEnd!=NULL) { |
| 305 *pRangeEnd=(UChar32)row[1]-1; |
| 306 } |
| 307 return row+2; |
| 308 } |
| 309 |
| 310 static int32_t U_CALLCONV |
| 311 upvec_compareRows(const void *context, const void *l, const void *r) { |
| 312 const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; |
| 313 const UPropsVectors *pv=(const UPropsVectors *)context; |
| 314 int32_t i, count, columns; |
| 315 |
| 316 count=columns=pv->columns; /* includes start/limit columns */ |
| 317 |
| 318 /* start comparing after start/limit but wrap around to them */ |
| 319 i=2; |
| 320 do { |
| 321 if(left[i]!=right[i]) { |
| 322 return left[i]<right[i] ? -1 : 1; |
| 323 } |
| 324 if(++i==columns) { |
| 325 i=0; |
| 326 } |
| 327 } while(--count>0); |
| 328 |
| 329 return 0; |
| 330 } |
| 331 |
| 332 U_CAPI void U_EXPORT2 |
| 333 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UE
rrorCode *pErrorCode) { |
| 334 uint32_t *row; |
| 335 int32_t i, columns, valueColumns, rows, count; |
| 336 UChar32 start, limit; |
| 337 |
| 338 /* argument checking */ |
| 339 if(U_FAILURE(*pErrorCode)) { |
| 340 return; |
| 341 } |
| 342 if(handler==NULL) { |
| 343 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 344 return; |
| 345 } |
| 346 if(pv->isCompacted) { |
| 347 return; |
| 348 } |
| 349 |
| 350 /* Set the flag now: Sorting and compacting destroys the builder data struct
ure. */ |
| 351 pv->isCompacted=TRUE; |
| 352 |
| 353 rows=pv->rows; |
| 354 columns=pv->columns; |
| 355 valueColumns=columns-2; /* not counting start & limit */ |
| 356 |
| 357 /* sort the properties vectors to find unique vector values */ |
| 358 uprv_sortArray(pv->v, rows, columns*4, |
| 359 upvec_compareRows, pv, FALSE, pErrorCode); |
| 360 if(U_FAILURE(*pErrorCode)) { |
| 361 return; |
| 362 } |
| 363 |
| 364 /* |
| 365 * Find and set the special values. |
| 366 * This has to do almost the same work as the compaction below, |
| 367 * to find the indexes where the special-value rows will move. |
| 368 */ |
| 369 row=pv->v; |
| 370 count=-valueColumns; |
| 371 for(i=0; i<rows; ++i) { |
| 372 start=(UChar32)row[0]; |
| 373 |
| 374 /* count a new values vector if it is different from the current one */ |
| 375 if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { |
| 376 count+=valueColumns; |
| 377 } |
| 378 |
| 379 if(start>=UPVEC_FIRST_SPECIAL_CP) { |
| 380 handler(context, start, start, count, row+2, valueColumns, pErrorCod
e); |
| 381 if(U_FAILURE(*pErrorCode)) { |
| 382 return; |
| 383 } |
| 384 } |
| 385 |
| 386 row+=columns; |
| 387 } |
| 388 |
| 389 /* count is at the beginning of the last vector, add valueColumns to include
that last vector */ |
| 390 count+=valueColumns; |
| 391 |
| 392 /* Call the handler once more to signal the start of delivering real values.
*/ |
| 393 handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, |
| 394 count, row-valueColumns, valueColumns, pErrorCode); |
| 395 if(U_FAILURE(*pErrorCode)) { |
| 396 return; |
| 397 } |
| 398 |
| 399 /* |
| 400 * Move vector contents up to a contiguous array with only unique |
| 401 * vector values, and call the handler function for each vector. |
| 402 * |
| 403 * This destroys the Properties Vector structure and replaces it |
| 404 * with an array of just vector values. |
| 405 */ |
| 406 row=pv->v; |
| 407 count=-valueColumns; |
| 408 for(i=0; i<rows; ++i) { |
| 409 /* fetch these first before memmove() may overwrite them */ |
| 410 start=(UChar32)row[0]; |
| 411 limit=(UChar32)row[1]; |
| 412 |
| 413 /* add a new values vector if it is different from the current one */ |
| 414 if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { |
| 415 count+=valueColumns; |
| 416 uprv_memmove(pv->v+count, row+2, valueColumns*4); |
| 417 } |
| 418 |
| 419 if(start<UPVEC_FIRST_SPECIAL_CP) { |
| 420 handler(context, start, limit-1, count, pv->v+count, valueColumns, p
ErrorCode); |
| 421 if(U_FAILURE(*pErrorCode)) { |
| 422 return; |
| 423 } |
| 424 } |
| 425 |
| 426 row+=columns; |
| 427 } |
| 428 |
| 429 /* count is at the beginning of the last vector, add one to include that las
t vector */ |
| 430 pv->rows=count/valueColumns+1; |
| 431 } |
| 432 |
| 433 U_CAPI const uint32_t * U_EXPORT2 |
| 434 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { |
| 435 if(!pv->isCompacted) { |
| 436 return NULL; |
| 437 } |
| 438 if(pRows!=NULL) { |
| 439 *pRows=pv->rows; |
| 440 } |
| 441 if(pColumns!=NULL) { |
| 442 *pColumns=pv->columns-2; |
| 443 } |
| 444 return pv->v; |
| 445 } |
| 446 |
| 447 U_CAPI uint32_t * U_EXPORT2 |
| 448 upvec_cloneArray(const UPropsVectors *pv, |
| 449 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { |
| 450 uint32_t *clonedArray; |
| 451 int32_t byteLength; |
| 452 |
| 453 if(U_FAILURE(*pErrorCode)) { |
| 454 return NULL; |
| 455 } |
| 456 if(!pv->isCompacted) { |
| 457 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 458 return NULL; |
| 459 } |
| 460 byteLength=pv->rows*(pv->columns-2)*4; |
| 461 clonedArray=(uint32_t *)uprv_malloc(byteLength); |
| 462 if(clonedArray==NULL) { |
| 463 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 464 return NULL; |
| 465 } |
| 466 uprv_memcpy(clonedArray, pv->v, byteLength); |
| 467 if(pRows!=NULL) { |
| 468 *pRows=pv->rows; |
| 469 } |
| 470 if(pColumns!=NULL) { |
| 471 *pColumns=pv->columns-2; |
| 472 } |
| 473 return clonedArray; |
| 474 } |
| 475 |
| 476 U_CAPI UTrie2 * U_EXPORT2 |
| 477 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { |
| 478 UPVecToUTrie2Context toUTrie2={ NULL }; |
| 479 upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); |
| 480 utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); |
| 481 if(U_FAILURE(*pErrorCode)) { |
| 482 utrie2_close(toUTrie2.trie); |
| 483 toUTrie2.trie=NULL; |
| 484 } |
| 485 return toUTrie2.trie; |
| 486 } |
| 487 |
| 488 /* |
| 489 * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, e
xtracts |
| 490 * some 16-bit field and builds and returns a UTrie2. |
| 491 */ |
| 492 |
| 493 U_CAPI void U_CALLCONV |
| 494 upvec_compactToUTrie2Handler(void *context, |
| 495 UChar32 start, UChar32 end, |
| 496 int32_t rowIndex, uint32_t *row, int32_t columns, |
| 497 UErrorCode *pErrorCode) { |
| 498 UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; |
| 499 if(start<UPVEC_FIRST_SPECIAL_CP) { |
| 500 utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE,
pErrorCode); |
| 501 } else { |
| 502 switch(start) { |
| 503 case UPVEC_INITIAL_VALUE_CP: |
| 504 toUTrie2->initialValue=rowIndex; |
| 505 break; |
| 506 case UPVEC_ERROR_VALUE_CP: |
| 507 toUTrie2->errorValue=rowIndex; |
| 508 break; |
| 509 case UPVEC_START_REAL_VALUES_CP: |
| 510 toUTrie2->maxValue=rowIndex; |
| 511 if(rowIndex>0xffff) { |
| 512 /* too many rows for a 16-bit trie */ |
| 513 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 514 } else { |
| 515 toUTrie2->trie=utrie2_open(toUTrie2->initialValue, |
| 516 toUTrie2->errorValue, pErrorCode); |
| 517 } |
| 518 break; |
| 519 default: |
| 520 break; |
| 521 } |
| 522 } |
| 523 } |
OLD | NEW |