| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2 ******************************************************************************* | 2 ******************************************************************************* | 
| 3 *   Copyright (C) 1996-2013, International Business Machines | 3 *   Copyright (C) 1996-2014, International Business Machines | 
| 4 *   Corporation and others.  All Rights Reserved. | 4 *   Corporation and others.  All Rights Reserved. | 
| 5 ******************************************************************************* | 5 ******************************************************************************* | 
| 6 *   file name:  ucol.cpp | 6 *   file name:  ucol.cpp | 
| 7 *   encoding:   US-ASCII | 7 *   encoding:   US-ASCII | 
| 8 *   tab size:   8 (not used) | 8 *   tab size:   8 (not used) | 
| 9 *   indentation:4 | 9 *   indentation:4 | 
| 10 * | 10 * | 
| 11 * Modification history | 11 * Modification history | 
| 12 * Date        Name      Comments | 12 * Date        Name      Comments | 
| 13 * 1996-1999   various members of ICU team maintained C API for collation framewo
      rk | 13 * 1996-1999   various members of ICU team maintained C API for collation framewo
      rk | 
| 14 * 02/16/2001  synwee    Added internal method getPrevSpecialCE | 14 * 02/16/2001  synwee    Added internal method getPrevSpecialCE | 
| 15 * 03/01/2001  synwee    Added maxexpansion functionality. | 15 * 03/01/2001  synwee    Added maxexpansion functionality. | 
| 16 * 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compl
      iant | 16 * 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compl
      iant | 
|  | 17 * 2012-2014   markus    Rewritten in C++ again. | 
| 17 */ | 18 */ | 
| 18 | 19 | 
| 19 #include "unicode/utypes.h" | 20 #include "unicode/utypes.h" | 
| 20 | 21 | 
| 21 #if !UCONFIG_NO_COLLATION | 22 #if !UCONFIG_NO_COLLATION | 
| 22 | 23 | 
|  | 24 #include "unicode/coll.h" | 
|  | 25 #include "unicode/tblcoll.h" | 
| 23 #include "unicode/bytestream.h" | 26 #include "unicode/bytestream.h" | 
| 24 #include "unicode/coleitr.h" | 27 #include "unicode/coleitr.h" | 
| 25 #include "unicode/unorm.h" | 28 #include "unicode/ucoleitr.h" | 
| 26 #include "unicode/udata.h" |  | 
| 27 #include "unicode/ustring.h" | 29 #include "unicode/ustring.h" | 
| 28 #include "unicode/utf8.h" |  | 
| 29 |  | 
| 30 #include "ucol_imp.h" |  | 
| 31 #include "bocsu.h" |  | 
| 32 |  | 
| 33 #include "normalizer2impl.h" |  | 
| 34 #include "unorm_it.h" |  | 
| 35 #include "umutex.h" |  | 
| 36 #include "cmemory.h" | 30 #include "cmemory.h" | 
| 37 #include "ucln_in.h" | 31 #include "collation.h" | 
| 38 #include "cstring.h" | 32 #include "cstring.h" | 
| 39 #include "utracimp.h" |  | 
| 40 #include "putilimp.h" | 33 #include "putilimp.h" | 
| 41 #include "uassert.h" | 34 #include "uassert.h" | 
| 42 #include "unicode/coll.h" | 35 #include "utracimp.h" | 
| 43 |  | 
| 44 #ifdef UCOL_DEBUG |  | 
| 45 #include <stdio.h> |  | 
| 46 #endif |  | 
| 47 | 36 | 
| 48 U_NAMESPACE_USE | 37 U_NAMESPACE_USE | 
| 49 | 38 | 
| 50 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |  | 
| 51 |  | 
| 52 #define LAST_BYTE_MASK_           0xFF |  | 
| 53 #define SECOND_LAST_BYTE_SHIFT_   8 |  | 
| 54 |  | 
| 55 #define ZERO_CC_LIMIT_            0xC0 |  | 
| 56 |  | 
| 57 // These are static pointers to the NFC/NFD implementation instance. |  | 
| 58 // Each of them is always the same between calls to u_cleanup |  | 
| 59 // and therefore writing to it is not synchronized. |  | 
| 60 // They are cleaned in ucol_cleanup |  | 
| 61 static const Normalizer2 *g_nfd = NULL; |  | 
| 62 static const Normalizer2Impl *g_nfcImpl = NULL; |  | 
| 63 |  | 
| 64 // These are values from UCA required for |  | 
| 65 // implicit generation and supressing sort key compression |  | 
| 66 // they should regularly be in the UCA, but if one |  | 
| 67 // is running without UCA, it could be a problem |  | 
| 68 static const int32_t maxRegularPrimary  = 0x7A; |  | 
| 69 static const int32_t minImplicitPrimary = 0xE0; |  | 
| 70 static const int32_t maxImplicitPrimary = 0xE4; |  | 
| 71 |  | 
| 72 U_CDECL_BEGIN |  | 
| 73 static UBool U_CALLCONV |  | 
| 74 ucol_cleanup(void) |  | 
| 75 { |  | 
| 76     g_nfd = NULL; |  | 
| 77     g_nfcImpl = NULL; |  | 
| 78     return TRUE; |  | 
| 79 } |  | 
| 80 |  | 
| 81 static int32_t U_CALLCONV |  | 
| 82 _getFoldingOffset(uint32_t data) { |  | 
| 83     return (int32_t)(data&0xFFFFFF); |  | 
| 84 } |  | 
| 85 |  | 
| 86 U_CDECL_END |  | 
| 87 |  | 
| 88 static inline |  | 
| 89 UBool initializeNFD(UErrorCode *status) { |  | 
| 90     if (g_nfd != NULL) { |  | 
| 91         return TRUE; |  | 
| 92     } else { |  | 
| 93         // The result is constant, until the library is reloaded. |  | 
| 94         g_nfd = Normalizer2Factory::getNFDInstance(*status); |  | 
| 95         ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup); |  | 
| 96         return U_SUCCESS(*status); |  | 
| 97     } |  | 
| 98 } |  | 
| 99 |  | 
| 100 // init FCD data |  | 
| 101 static inline |  | 
| 102 UBool initializeFCD(UErrorCode *status) { |  | 
| 103     if (g_nfcImpl != NULL) { |  | 
| 104         return TRUE; |  | 
| 105     } else { |  | 
| 106         // The result is constant, until the library is reloaded. |  | 
| 107         g_nfcImpl = Normalizer2Factory::getNFCImpl(*status); |  | 
| 108         // Note: Alternatively, we could also store this pointer in each collIte
      rate struct, |  | 
| 109         // same as Normalizer2Factory::getImpl(collIterate->nfd). |  | 
| 110         ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup); |  | 
| 111         return U_SUCCESS(*status); |  | 
| 112     } |  | 
| 113 } |  | 
| 114 |  | 
| 115 static |  | 
| 116 inline void IInit_collIterate(const UCollator *collator, const UChar *sourceStri
      ng, |  | 
| 117                               int32_t sourceLen, collIterate *s, |  | 
| 118                               UErrorCode *status) |  | 
| 119 { |  | 
| 120     (s)->string = (s)->pos = sourceString; |  | 
| 121     (s)->origFlags = 0; |  | 
| 122     (s)->flags = 0; |  | 
| 123     if (sourceLen >= 0) { |  | 
| 124         s->flags |= UCOL_ITER_HASLEN; |  | 
| 125         (s)->endp = (UChar *)sourceString+sourceLen; |  | 
| 126     } |  | 
| 127     else { |  | 
| 128         /* change to enable easier checking for end of string for fcdpositon */ |  | 
| 129         (s)->endp = NULL; |  | 
| 130     } |  | 
| 131     (s)->extendCEs = NULL; |  | 
| 132     (s)->extendCEsSize = 0; |  | 
| 133     (s)->CEpos = (s)->toReturn = (s)->CEs; |  | 
| 134     (s)->offsetBuffer = NULL; |  | 
| 135     (s)->offsetBufferSize = 0; |  | 
| 136     (s)->offsetReturn = (s)->offsetStore = NULL; |  | 
| 137     (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0; |  | 
| 138     (s)->coll = (collator); |  | 
| 139     if (initializeNFD(status)) { |  | 
| 140         (s)->nfd = g_nfd; |  | 
| 141     } else { |  | 
| 142         return; |  | 
| 143     } |  | 
| 144     (s)->fcdPosition = 0; |  | 
| 145     if(collator->normalizationMode == UCOL_ON) { |  | 
| 146         (s)->flags |= UCOL_ITER_NORM; |  | 
| 147     } |  | 
| 148     if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) 
      { |  | 
| 149         (s)->flags |= UCOL_HIRAGANA_Q; |  | 
| 150     } |  | 
| 151     (s)->iterator = NULL; |  | 
| 152     //(s)->iteratorIndex = 0; |  | 
| 153 } |  | 
| 154 |  | 
| 155 U_CAPI void  U_EXPORT2 |  | 
| 156 uprv_init_collIterate(const UCollator *collator, const UChar *sourceString, |  | 
| 157                              int32_t sourceLen, collIterate *s, |  | 
| 158                              UErrorCode *status) { |  | 
| 159     /* Out-of-line version for use from other files. */ |  | 
| 160     IInit_collIterate(collator, sourceString, sourceLen, s, status); |  | 
| 161 } |  | 
| 162 |  | 
| 163 U_CAPI collIterate * U_EXPORT2 |  | 
| 164 uprv_new_collIterate(UErrorCode *status) { |  | 
| 165     if(U_FAILURE(*status)) { |  | 
| 166         return NULL; |  | 
| 167     } |  | 
| 168     collIterate *s = new collIterate; |  | 
| 169     if(s == NULL) { |  | 
| 170         *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 171         return NULL; |  | 
| 172     } |  | 
| 173     return s; |  | 
| 174 } |  | 
| 175 |  | 
| 176 U_CAPI void U_EXPORT2 |  | 
| 177 uprv_delete_collIterate(collIterate *s) { |  | 
| 178     delete s; |  | 
| 179 } |  | 
| 180 |  | 
| 181 U_CAPI UBool U_EXPORT2 |  | 
| 182 uprv_collIterateAtEnd(collIterate *s) { |  | 
| 183     return s == NULL || s->pos == s->endp; |  | 
| 184 } |  | 
| 185 |  | 
| 186 /** |  | 
| 187 * Backup the state of the collIterate struct data |  | 
| 188 * @param data collIterate to backup |  | 
| 189 * @param backup storage |  | 
| 190 */ |  | 
| 191 static |  | 
| 192 inline void backupState(const collIterate *data, collIterateState *backup) |  | 
| 193 { |  | 
| 194     backup->fcdPosition = data->fcdPosition; |  | 
| 195     backup->flags       = data->flags; |  | 
| 196     backup->origFlags   = data->origFlags; |  | 
| 197     backup->pos         = data->pos; |  | 
| 198     backup->bufferaddress = data->writableBuffer.getBuffer(); |  | 
| 199     backup->buffersize    = data->writableBuffer.length(); |  | 
| 200     backup->iteratorMove = 0; |  | 
| 201     backup->iteratorIndex = 0; |  | 
| 202     if(data->iterator != NULL) { |  | 
| 203         //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER
      _CURRENT); |  | 
| 204         backup->iteratorIndex = data->iterator->getState(data->iterator); |  | 
| 205         // no we try to fixup if we're using a normalizing iterator and we get U
      ITER_NO_STATE |  | 
| 206         if(backup->iteratorIndex == UITER_NO_STATE) { |  | 
| 207             while((backup->iteratorIndex = data->iterator->getState(data->iterat
      or)) == UITER_NO_STATE) { |  | 
| 208                 backup->iteratorMove++; |  | 
| 209                 data->iterator->move(data->iterator, -1, UITER_CURRENT); |  | 
| 210             } |  | 
| 211             data->iterator->move(data->iterator, backup->iteratorMove, UITER_CUR
      RENT); |  | 
| 212         } |  | 
| 213     } |  | 
| 214 } |  | 
| 215 |  | 
| 216 /** |  | 
| 217 * Loads the state into the collIterate struct data |  | 
| 218 * @param data collIterate to backup |  | 
| 219 * @param backup storage |  | 
| 220 * @param forwards boolean to indicate if forwards iteration is used, |  | 
| 221 *        false indicates backwards iteration |  | 
| 222 */ |  | 
| 223 static |  | 
| 224 inline void loadState(collIterate *data, const collIterateState *backup, |  | 
| 225                       UBool        forwards) |  | 
| 226 { |  | 
| 227     UErrorCode status = U_ZERO_ERROR; |  | 
| 228     data->flags       = backup->flags; |  | 
| 229     data->origFlags   = backup->origFlags; |  | 
| 230     if(data->iterator != NULL) { |  | 
| 231         //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO
      ); |  | 
| 232         data->iterator->setState(data->iterator, backup->iteratorIndex, &status)
      ; |  | 
| 233         if(backup->iteratorMove != 0) { |  | 
| 234             data->iterator->move(data->iterator, backup->iteratorMove, UITER_CUR
      RENT); |  | 
| 235         } |  | 
| 236     } |  | 
| 237     data->pos         = backup->pos; |  | 
| 238 |  | 
| 239     if ((data->flags & UCOL_ITER_INNORMBUF) && |  | 
| 240         data->writableBuffer.getBuffer() != backup->bufferaddress) { |  | 
| 241         /* |  | 
| 242         this is when a new buffer has been reallocated and we'll have to |  | 
| 243         calculate the new position. |  | 
| 244         note the new buffer has to contain the contents of the old buffer. |  | 
| 245         */ |  | 
| 246         if (forwards) { |  | 
| 247             data->pos = data->writableBuffer.getTerminatedBuffer() + |  | 
| 248                                          (data->pos - backup->bufferaddress); |  | 
| 249         } |  | 
| 250         else { |  | 
| 251             /* backwards direction */ |  | 
| 252             int32_t temp = backup->buffersize - |  | 
| 253                                   (int32_t)(data->pos - backup->bufferaddress); |  | 
| 254             data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writ
      ableBuffer.length() - temp); |  | 
| 255         } |  | 
| 256     } |  | 
| 257     if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { |  | 
| 258         /* |  | 
| 259         this is alittle tricky. |  | 
| 260         if we are initially not in the normalization buffer, even if we |  | 
| 261         normalize in the later stage, the data in the buffer will be |  | 
| 262         ignored, since we skip back up to the data string. |  | 
| 263         however if we are already in the normalization buffer, any |  | 
| 264         further normalization will pull data into the normalization |  | 
| 265         buffer and modify the fcdPosition. |  | 
| 266         since we are keeping the data in the buffer for use, the |  | 
| 267         fcdPosition can not be reverted back. |  | 
| 268         arrgghh.... |  | 
| 269         */ |  | 
| 270         data->fcdPosition = backup->fcdPosition; |  | 
| 271     } |  | 
| 272 } |  | 
| 273 |  | 
| 274 static UBool |  | 
| 275 reallocCEs(collIterate *data, int32_t newCapacity) { |  | 
| 276     uint32_t *oldCEs = data->extendCEs; |  | 
| 277     if(oldCEs == NULL) { |  | 
| 278         oldCEs = data->CEs; |  | 
| 279     } |  | 
| 280     int32_t length = data->CEpos - oldCEs; |  | 
| 281     uint32_t *newCEs = (uint32_t *)uprv_malloc(newCapacity * 4); |  | 
| 282     if(newCEs == NULL) { |  | 
| 283         return FALSE; |  | 
| 284     } |  | 
| 285     uprv_memcpy(newCEs, oldCEs, length * 4); |  | 
| 286     uprv_free(data->extendCEs); |  | 
| 287     data->extendCEs = newCEs; |  | 
| 288     data->extendCEsSize = newCapacity; |  | 
| 289     data->CEpos = newCEs + length; |  | 
| 290     return TRUE; |  | 
| 291 } |  | 
| 292 |  | 
| 293 static UBool |  | 
| 294 increaseCEsCapacity(collIterate *data) { |  | 
| 295     int32_t oldCapacity; |  | 
| 296     if(data->extendCEs != NULL) { |  | 
| 297         oldCapacity = data->extendCEsSize; |  | 
| 298     } else { |  | 
| 299         oldCapacity = LENGTHOF(data->CEs); |  | 
| 300     } |  | 
| 301     return reallocCEs(data, 2 * oldCapacity); |  | 
| 302 } |  | 
| 303 |  | 
| 304 static UBool |  | 
| 305 ensureCEsCapacity(collIterate *data, int32_t minCapacity) { |  | 
| 306     int32_t oldCapacity; |  | 
| 307     if(data->extendCEs != NULL) { |  | 
| 308         oldCapacity = data->extendCEsSize; |  | 
| 309     } else { |  | 
| 310         oldCapacity = LENGTHOF(data->CEs); |  | 
| 311     } |  | 
| 312     if(minCapacity <= oldCapacity) { |  | 
| 313         return TRUE; |  | 
| 314     } |  | 
| 315     oldCapacity *= 2; |  | 
| 316     return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacit
      y); |  | 
| 317 } |  | 
| 318 |  | 
| 319 void collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) { |  | 
| 320     if(U_FAILURE(errorCode)) { |  | 
| 321         return; |  | 
| 322     } |  | 
| 323     int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuf
      fer); |  | 
| 324     U_ASSERT(length >= offsetBufferSize || offsetStore != NULL); |  | 
| 325     if(length >= offsetBufferSize) { |  | 
| 326         int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE; |  | 
| 327         int32_t *newBuffer = static_cast<int32_t *>(uprv_malloc(newCapacity * 4)
      ); |  | 
| 328         if(newBuffer == NULL) { |  | 
| 329             errorCode = U_MEMORY_ALLOCATION_ERROR; |  | 
| 330             return; |  | 
| 331         } |  | 
| 332         if(length > 0) { |  | 
| 333             uprv_memcpy(newBuffer, offsetBuffer, length * 4); |  | 
| 334         } |  | 
| 335         uprv_free(offsetBuffer); |  | 
| 336         offsetBuffer = newBuffer; |  | 
| 337         offsetStore = offsetBuffer + length; |  | 
| 338         offsetBufferSize = newCapacity; |  | 
| 339     } |  | 
| 340     *offsetStore++ = offset; |  | 
| 341 } |  | 
| 342 |  | 
| 343 /* |  | 
| 344 * collIter_eos() |  | 
| 345 *     Checks for a collIterate being positioned at the end of |  | 
| 346 *     its source string. |  | 
| 347 * |  | 
| 348 */ |  | 
| 349 static |  | 
| 350 inline UBool collIter_eos(collIterate *s) { |  | 
| 351     if(s->flags & UCOL_USE_ITERATOR) { |  | 
| 352       return !(s->iterator->hasNext(s->iterator)); |  | 
| 353     } |  | 
| 354     if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) { |  | 
| 355         // Null terminated string, but not at null, so not at end. |  | 
| 356         //   Whether in main or normalization buffer doesn't matter. |  | 
| 357         return FALSE; |  | 
| 358     } |  | 
| 359 |  | 
| 360     // String with length.  Can't be in normalization buffer, which is always |  | 
| 361     //  null termintated. |  | 
| 362     if (s->flags & UCOL_ITER_HASLEN) { |  | 
| 363         return (s->pos == s->endp); |  | 
| 364     } |  | 
| 365 |  | 
| 366     // We are at a null termination, could be either normalization buffer or mai
      n string. |  | 
| 367     if ((s->flags & UCOL_ITER_INNORMBUF) == 0) { |  | 
| 368         // At null at end of main string. |  | 
| 369         return TRUE; |  | 
| 370     } |  | 
| 371 |  | 
| 372     // At null at end of normalization buffer.  Need to check whether there ther
      e are |  | 
| 373     //   any characters left in the main buffer. |  | 
| 374     if(s->origFlags & UCOL_USE_ITERATOR) { |  | 
| 375       return !(s->iterator->hasNext(s->iterator)); |  | 
| 376     } else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) { |  | 
| 377         // Null terminated main string.  fcdPosition is the 'return' position in
      to main buf. |  | 
| 378         return (*s->fcdPosition == 0); |  | 
| 379     } |  | 
| 380     else { |  | 
| 381         // Main string with an end pointer. |  | 
| 382         return s->fcdPosition == s->endp; |  | 
| 383     } |  | 
| 384 } |  | 
| 385 |  | 
| 386 /* |  | 
| 387 * collIter_bos() |  | 
| 388 *     Checks for a collIterate being positioned at the start of |  | 
| 389 *     its source string. |  | 
| 390 * |  | 
| 391 */ |  | 
| 392 static |  | 
| 393 inline UBool collIter_bos(collIterate *source) { |  | 
| 394   // if we're going backwards, we need to know whether there is more in the |  | 
| 395   // iterator, even if we are in the side buffer |  | 
| 396   if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR)
       { |  | 
| 397     return !source->iterator->hasPrevious(source->iterator); |  | 
| 398   } |  | 
| 399   if (source->pos <= source->string || |  | 
| 400       ((source->flags & UCOL_ITER_INNORMBUF) && |  | 
| 401       *(source->pos - 1) == 0 && source->fcdPosition == NULL)) { |  | 
| 402     return TRUE; |  | 
| 403   } |  | 
| 404   return FALSE; |  | 
| 405 } |  | 
| 406 |  | 
| 407 /*static |  | 
| 408 inline UBool collIter_SimpleBos(collIterate *source) { |  | 
| 409   // if we're going backwards, we need to know whether there is more in the |  | 
| 410   // iterator, even if we are in the side buffer |  | 
| 411   if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR)
       { |  | 
| 412     return !source->iterator->hasPrevious(source->iterator); |  | 
| 413   } |  | 
| 414   if (source->pos == source->string) { |  | 
| 415     return TRUE; |  | 
| 416   } |  | 
| 417   return FALSE; |  | 
| 418 }*/ |  | 
| 419     //return (data->pos == data->string) || |  | 
| 420 |  | 
| 421 |  | 
| 422 /****************************************************************************/ |  | 
| 423 /* Following are the open/close functions                                   */ |  | 
| 424 /*                                                                          */ |  | 
| 425 /****************************************************************************/ |  | 
| 426 |  | 
| 427 static UCollator* |  | 
| 428 ucol_initFromBinary(const uint8_t *bin, int32_t length, |  | 
| 429                 const UCollator *base, |  | 
| 430                 UCollator *fillIn, |  | 
| 431                 UErrorCode *status) |  | 
| 432 { |  | 
| 433     UCollator *result = fillIn; |  | 
| 434     if(U_FAILURE(*status)) { |  | 
| 435         return NULL; |  | 
| 436     } |  | 
| 437     /* |  | 
| 438     if(base == NULL) { |  | 
| 439         // we don't support null base yet |  | 
| 440         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 441         return NULL; |  | 
| 442     } |  | 
| 443     */ |  | 
| 444     // We need these and we could be running without UCA |  | 
| 445     uprv_uca_initImplicitConstants(status); |  | 
| 446     UCATableHeader *colData = (UCATableHeader *)bin; |  | 
| 447     // do we want version check here? We're trying to figure out whether collato
      rs are compatible |  | 
| 448     if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeo
      f(UVersionInfo)) != 0 || |  | 
| 449         uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersio
      nInfo)) != 0)) || |  | 
| 450         colData->version[0] != UCOL_BUILDER_VERSION) |  | 
| 451     { |  | 
| 452         *status = U_COLLATOR_VERSION_MISMATCH; |  | 
| 453         return NULL; |  | 
| 454     } |  | 
| 455     else { |  | 
| 456         if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(s
      izeof(UColOptionSet)))) { |  | 
| 457             result = ucol_initCollator((const UCATableHeader *)bin, result, base
      , status); |  | 
| 458             if(U_FAILURE(*status)){ |  | 
| 459                 return NULL; |  | 
| 460             } |  | 
| 461             result->hasRealData = TRUE; |  | 
| 462         } |  | 
| 463         else { |  | 
| 464             if(base) { |  | 
| 465                 result = ucol_initCollator(base->image, result, base, status); |  | 
| 466                 ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const 
      UCATableHeader *)bin)->options), status); |  | 
| 467                 if(U_FAILURE(*status)){ |  | 
| 468                     return NULL; |  | 
| 469                 } |  | 
| 470                 result->hasRealData = FALSE; |  | 
| 471             } |  | 
| 472             else { |  | 
| 473                 *status = U_USELESS_COLLATOR_ERROR; |  | 
| 474                 return NULL; |  | 
| 475             } |  | 
| 476         } |  | 
| 477         result->freeImageOnClose = FALSE; |  | 
| 478     } |  | 
| 479     result->actualLocale = NULL; |  | 
| 480     result->validLocale = NULL; |  | 
| 481     result->requestedLocale = NULL; |  | 
| 482     result->rules = NULL; |  | 
| 483     result->rulesLength = 0; |  | 
| 484     result->freeRulesOnClose = FALSE; |  | 
| 485     result->ucaRules = NULL; |  | 
| 486     return result; |  | 
| 487 } |  | 
| 488 |  | 
| 489 U_CAPI UCollator* U_EXPORT2 | 39 U_CAPI UCollator* U_EXPORT2 | 
| 490 ucol_openBinary(const uint8_t *bin, int32_t length, | 40 ucol_openBinary(const uint8_t *bin, int32_t length, | 
| 491                 const UCollator *base, | 41                 const UCollator *base, | 
| 492                 UErrorCode *status) | 42                 UErrorCode *status) | 
| 493 { | 43 { | 
| 494     return ucol_initFromBinary(bin, length, base, NULL, status); | 44     if(U_FAILURE(*status)) { return NULL; } | 
|  | 45     RuleBasedCollator *coll = new RuleBasedCollator( | 
|  | 46             bin, length, | 
|  | 47             RuleBasedCollator::rbcFromUCollator(base), | 
|  | 48             *status); | 
|  | 49     if(coll == NULL) { | 
|  | 50         *status = U_MEMORY_ALLOCATION_ERROR; | 
|  | 51         return NULL; | 
|  | 52     } | 
|  | 53     if(U_FAILURE(*status)) { | 
|  | 54         delete coll; | 
|  | 55         return NULL; | 
|  | 56     } | 
|  | 57     return coll->toUCollator(); | 
| 495 } | 58 } | 
| 496 | 59 | 
| 497 U_CAPI int32_t U_EXPORT2 | 60 U_CAPI int32_t U_EXPORT2 | 
| 498 ucol_cloneBinary(const UCollator *coll, | 61 ucol_cloneBinary(const UCollator *coll, | 
| 499                  uint8_t *buffer, int32_t capacity, | 62                  uint8_t *buffer, int32_t capacity, | 
| 500                  UErrorCode *status) | 63                  UErrorCode *status) | 
| 501 { | 64 { | 
| 502     int32_t length = 0; |  | 
| 503     if(U_FAILURE(*status)) { | 65     if(U_FAILURE(*status)) { | 
| 504         return length; | 66         return 0; | 
| 505     } | 67     } | 
| 506     if(capacity < 0) { | 68     const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); | 
| 507         *status = U_ILLEGAL_ARGUMENT_ERROR; | 69     if(rbc == NULL && coll != NULL) { | 
| 508         return length; | 70         *status = U_UNSUPPORTED_ERROR; | 
|  | 71         return 0; | 
| 509     } | 72     } | 
| 510     if(coll->hasRealData == TRUE) { | 73     return rbc->cloneBinary(buffer, capacity, *status); | 
| 511         length = coll->image->size; |  | 
| 512         if(length <= capacity) { |  | 
| 513             uprv_memcpy(buffer, coll->image, length); |  | 
| 514         } else { |  | 
| 515             *status = U_BUFFER_OVERFLOW_ERROR; |  | 
| 516         } |  | 
| 517     } else { |  | 
| 518         length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(
      UColOptionSet))); |  | 
| 519         if(length <= capacity) { |  | 
| 520             /* build the UCATableHeader with minimal entries */ |  | 
| 521             /* do not copy the header from the UCA file because its values are w
      rong! */ |  | 
| 522             /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */ |  | 
| 523 |  | 
| 524             /* reset everything */ |  | 
| 525             uprv_memset(buffer, 0, length); |  | 
| 526 |  | 
| 527             /* set the tailoring-specific values */ |  | 
| 528             UCATableHeader *myData = (UCATableHeader *)buffer; |  | 
| 529             myData->size = length; |  | 
| 530 |  | 
| 531             /* offset for the options, the only part of the data that is present
       after the header */ |  | 
| 532             myData->options = sizeof(UCATableHeader); |  | 
| 533 |  | 
| 534             /* need to always set the expansion value for an upper bound of the 
      options */ |  | 
| 535             myData->expansion = myData->options + sizeof(UColOptionSet); |  | 
| 536 |  | 
| 537             myData->magic = UCOL_HEADER_MAGIC; |  | 
| 538             myData->isBigEndian = U_IS_BIG_ENDIAN; |  | 
| 539             myData->charSetFamily = U_CHARSET_FAMILY; |  | 
| 540 |  | 
| 541             /* copy UCA's version; genrb will override all but the builder versi
      on with tailoring data */ |  | 
| 542             uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionIn
      fo)); |  | 
| 543 |  | 
| 544             uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVer
      sionInfo)); |  | 
| 545             uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVer
      sionInfo)); |  | 
| 546             uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeo
      f(UVersionInfo)); |  | 
| 547             myData->jamoSpecial = coll->image->jamoSpecial; |  | 
| 548 |  | 
| 549             /* copy the collator options */ |  | 
| 550             uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options
      , sizeof(UColOptionSet)); |  | 
| 551         } else { |  | 
| 552             *status = U_BUFFER_OVERFLOW_ERROR; |  | 
| 553         } |  | 
| 554     } |  | 
| 555     return length; |  | 
| 556 } | 74 } | 
| 557 | 75 | 
| 558 U_CAPI UCollator* U_EXPORT2 | 76 U_CAPI UCollator* U_EXPORT2 | 
| 559 ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferS
      ize, UErrorCode *status) | 77 ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferS
      ize, UErrorCode *status) | 
| 560 { | 78 { | 
| 561     UCollator * localCollator; |  | 
| 562     int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator); |  | 
| 563     int32_t imageSize = 0; |  | 
| 564     int32_t rulesSize = 0; |  | 
| 565     int32_t rulesPadding = 0; |  | 
| 566     int32_t defaultReorderCodesSize = 0; |  | 
| 567     int32_t reorderCodesSize = 0; |  | 
| 568     uint8_t *image; |  | 
| 569     UChar *rules; |  | 
| 570     int32_t* defaultReorderCodes; |  | 
| 571     int32_t* reorderCodes; |  | 
| 572     uint8_t* leadBytePermutationTable; |  | 
| 573     UBool imageAllocated = FALSE; |  | 
| 574 |  | 
| 575     if (status == NULL || U_FAILURE(*status)){ | 79     if (status == NULL || U_FAILURE(*status)){ | 
| 576         return NULL; | 80         return NULL; | 
| 577     } | 81     } | 
| 578     if (coll == NULL) { | 82     if (coll == NULL) { | 
| 579        *status = U_ILLEGAL_ARGUMENT_ERROR; | 83        *status = U_ILLEGAL_ARGUMENT_ERROR; | 
| 580         return NULL; | 84         return NULL; | 
| 581     } | 85     } | 
| 582 |  | 
| 583     if (coll->rules && coll->freeRulesOnClose) { |  | 
| 584         rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar); |  | 
| 585         rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar)); |  | 
| 586         bufferSizeNeeded += rulesSize + rulesPadding; |  | 
| 587     } |  | 
| 588     // no padding for alignment needed from here since the next two are 4 byte q
      uantities |  | 
| 589     if (coll->defaultReorderCodes) { |  | 
| 590         defaultReorderCodesSize = coll->defaultReorderCodesLength * sizeof(int32
      _t); |  | 
| 591         bufferSizeNeeded += defaultReorderCodesSize; |  | 
| 592     } |  | 
| 593     if (coll->reorderCodes) { |  | 
| 594         reorderCodesSize = coll->reorderCodesLength * sizeof(int32_t); |  | 
| 595         bufferSizeNeeded += reorderCodesSize; |  | 
| 596     } |  | 
| 597     if (coll->leadBytePermutationTable) { |  | 
| 598         bufferSizeNeeded += 256 * sizeof(uint8_t); |  | 
| 599     } |  | 
| 600 |  | 
| 601     if (pBufferSize != NULL) { | 86     if (pBufferSize != NULL) { | 
| 602         int32_t inputSize = *pBufferSize; | 87         int32_t inputSize = *pBufferSize; | 
| 603         *pBufferSize = 1; | 88         *pBufferSize = 1; | 
| 604         if (inputSize == 0) { | 89         if (inputSize == 0) { | 
| 605             return NULL;  // preflighting for deprecated functionality | 90             return NULL;  // preflighting for deprecated functionality | 
| 606         } | 91         } | 
| 607     } | 92     } | 
| 608 | 93     Collator *newColl = Collator::fromUCollator(coll)->clone(); | 
| 609     char *stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded); | 94     if (newColl == NULL) { | 
| 610     // Null pointer check. |  | 
| 611     if (stackBufferChars == NULL) { |  | 
| 612         *status = U_MEMORY_ALLOCATION_ERROR; | 95         *status = U_MEMORY_ALLOCATION_ERROR; | 
| 613         return NULL; | 96     } else { | 
|  | 97         *status = U_SAFECLONE_ALLOCATED_WARNING; | 
| 614     } | 98     } | 
| 615     *status = U_SAFECLONE_ALLOCATED_WARNING; | 99     return newColl->toUCollator(); | 
| 616 |  | 
| 617     localCollator = (UCollator *)stackBufferChars; |  | 
| 618     rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding); |  | 
| 619     defaultReorderCodes = (int32_t*)((uint8_t*)rules + rulesSize); |  | 
| 620     reorderCodes = (int32_t*)((uint8_t*)defaultReorderCodes + defaultReorderCode
      sSize); |  | 
| 621     leadBytePermutationTable = (uint8_t*)reorderCodes + reorderCodesSize; |  | 
| 622 |  | 
| 623     { |  | 
| 624         UErrorCode tempStatus = U_ZERO_ERROR; |  | 
| 625         imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus); |  | 
| 626     } |  | 
| 627     if (coll->freeImageOnClose) { |  | 
| 628         image = (uint8_t *)uprv_malloc(imageSize); |  | 
| 629         // Null pointer check |  | 
| 630         if (image == NULL) { |  | 
| 631             *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 632             return NULL; |  | 
| 633         } |  | 
| 634         ucol_cloneBinary(coll, image, imageSize, status); |  | 
| 635         imageAllocated = TRUE; |  | 
| 636     } |  | 
| 637     else { |  | 
| 638         image = (uint8_t *)coll->image; |  | 
| 639     } |  | 
| 640     localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollat
      or, status); |  | 
| 641     if (U_FAILURE(*status)) { |  | 
| 642         return NULL; |  | 
| 643     } |  | 
| 644 |  | 
| 645     if (coll->rules) { |  | 
| 646         if (coll->freeRulesOnClose) { |  | 
| 647             localCollator->rules = u_strcpy(rules, coll->rules); |  | 
| 648             //bufferEnd += rulesSize; |  | 
| 649         } |  | 
| 650         else { |  | 
| 651             localCollator->rules = coll->rules; |  | 
| 652         } |  | 
| 653         localCollator->freeRulesOnClose = FALSE; |  | 
| 654         localCollator->rulesLength = coll->rulesLength; |  | 
| 655     } |  | 
| 656 |  | 
| 657     // collator reordering |  | 
| 658     if (coll->defaultReorderCodes) { |  | 
| 659         localCollator->defaultReorderCodes = |  | 
| 660             (int32_t*) uprv_memcpy(defaultReorderCodes, coll->defaultReorderCode
      s, coll->defaultReorderCodesLength * sizeof(int32_t)); |  | 
| 661         localCollator->defaultReorderCodesLength = coll->defaultReorderCodesLeng
      th; |  | 
| 662         localCollator->freeDefaultReorderCodesOnClose = FALSE; |  | 
| 663     } |  | 
| 664     if (coll->reorderCodes) { |  | 
| 665         localCollator->reorderCodes = |  | 
| 666             (int32_t*)uprv_memcpy(reorderCodes, coll->reorderCodes, coll->reorde
      rCodesLength * sizeof(int32_t)); |  | 
| 667         localCollator->reorderCodesLength = coll->reorderCodesLength; |  | 
| 668         localCollator->freeReorderCodesOnClose = FALSE; |  | 
| 669     } |  | 
| 670     if (coll->leadBytePermutationTable) { |  | 
| 671         localCollator->leadBytePermutationTable = |  | 
| 672             (uint8_t*) uprv_memcpy(leadBytePermutationTable, coll->leadBytePermu
      tationTable, 256); |  | 
| 673         localCollator->freeLeadBytePermutationTableOnClose = FALSE; |  | 
| 674     } |  | 
| 675 |  | 
| 676     int32_t i; |  | 
| 677     for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { |  | 
| 678         ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(col
      l, (UColAttribute)i, status), status); |  | 
| 679     } |  | 
| 680     // zero copies of pointers |  | 
| 681     localCollator->actualLocale = NULL; |  | 
| 682     localCollator->validLocale = NULL; |  | 
| 683     localCollator->requestedLocale = NULL; |  | 
| 684     localCollator->ucaRules = coll->ucaRules; // There should only be one copy h
      ere. |  | 
| 685     localCollator->freeOnClose = TRUE; |  | 
| 686     localCollator->freeImageOnClose = imageAllocated; |  | 
| 687     return localCollator; |  | 
| 688 } | 100 } | 
| 689 | 101 | 
| 690 U_CAPI void U_EXPORT2 | 102 U_CAPI void U_EXPORT2 | 
| 691 ucol_close(UCollator *coll) | 103 ucol_close(UCollator *coll) | 
| 692 { | 104 { | 
| 693     UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); | 105     UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); | 
| 694     UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); | 106     UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); | 
| 695     if(coll != NULL) { | 107     if(coll != NULL) { | 
| 696         // these are always owned by each UCollator struct, | 108         delete Collator::fromUCollator(coll); | 
| 697         // so we always free them |  | 
| 698         if(coll->validLocale != NULL) { |  | 
| 699             uprv_free(coll->validLocale); |  | 
| 700         } |  | 
| 701         if(coll->actualLocale != NULL) { |  | 
| 702             uprv_free(coll->actualLocale); |  | 
| 703         } |  | 
| 704         if(coll->requestedLocale != NULL) { |  | 
| 705             uprv_free(coll->requestedLocale); |  | 
| 706         } |  | 
| 707         if(coll->latinOneCEs != NULL) { |  | 
| 708             uprv_free(coll->latinOneCEs); |  | 
| 709         } |  | 
| 710         if(coll->options != NULL && coll->freeOptionsOnClose) { |  | 
| 711             uprv_free(coll->options); |  | 
| 712         } |  | 
| 713         if(coll->rules != NULL && coll->freeRulesOnClose) { |  | 
| 714             uprv_free((UChar *)coll->rules); |  | 
| 715         } |  | 
| 716         if(coll->image != NULL && coll->freeImageOnClose) { |  | 
| 717             uprv_free((UCATableHeader *)coll->image); |  | 
| 718         } |  | 
| 719 |  | 
| 720         if(coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutati
      onTableOnClose == TRUE) { |  | 
| 721             uprv_free(coll->leadBytePermutationTable); |  | 
| 722         } |  | 
| 723         if(coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnC
      lose == TRUE) { |  | 
| 724             uprv_free(coll->defaultReorderCodes); |  | 
| 725         } |  | 
| 726         if(coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) 
      { |  | 
| 727             uprv_free(coll->reorderCodes); |  | 
| 728         } |  | 
| 729 |  | 
| 730         if(coll->delegate != NULL) { |  | 
| 731           delete (Collator*)coll->delegate; |  | 
| 732         } |  | 
| 733 |  | 
| 734         /* Here, it would be advisable to close: */ |  | 
| 735         /* - UData for UCA (unless we stuff it in the root resb */ |  | 
| 736         /* Again, do we need additional housekeeping... HMMM! */ |  | 
| 737         UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose); |  | 
| 738         if(coll->freeOnClose){ |  | 
| 739             /* for safeClone, if freeOnClose is FALSE, |  | 
| 740             don't free the other instance data */ |  | 
| 741             uprv_free(coll); |  | 
| 742         } |  | 
| 743     } | 109     } | 
| 744     UTRACE_EXIT(); | 110     UTRACE_EXIT(); | 
| 745 } | 111 } | 
| 746 | 112 | 
| 747 void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCo
      de *status) { |  | 
| 748     if(U_FAILURE(*status)) { |  | 
| 749         return; |  | 
| 750     } |  | 
| 751     result->caseFirst = (UColAttributeValue)opts->caseFirst; |  | 
| 752     result->caseLevel = (UColAttributeValue)opts->caseLevel; |  | 
| 753     result->frenchCollation = (UColAttributeValue)opts->frenchCollation; |  | 
| 754     result->normalizationMode = (UColAttributeValue)opts->normalizationMode; |  | 
| 755     if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) { |  | 
| 756         return; |  | 
| 757     } |  | 
| 758     result->strength = (UColAttributeValue)opts->strength; |  | 
| 759     result->variableTopValue = opts->variableTopValue; |  | 
| 760     result->alternateHandling = (UColAttributeValue)opts->alternateHandling; |  | 
| 761     result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ; |  | 
| 762     result->numericCollation = (UColAttributeValue)opts->numericCollation; |  | 
| 763     result->caseFirstisDefault = TRUE; |  | 
| 764     result->caseLevelisDefault = TRUE; |  | 
| 765     result->frenchCollationisDefault = TRUE; |  | 
| 766     result->normalizationModeisDefault = TRUE; |  | 
| 767     result->strengthisDefault = TRUE; |  | 
| 768     result->variableTopValueisDefault = TRUE; |  | 
| 769     result->alternateHandlingisDefault = TRUE; |  | 
| 770     result->hiraganaQisDefault = TRUE; |  | 
| 771     result->numericCollationisDefault = TRUE; |  | 
| 772 |  | 
| 773     ucol_updateInternalState(result, status); |  | 
| 774 |  | 
| 775     result->options = opts; |  | 
| 776 } |  | 
| 777 |  | 
| 778 |  | 
| 779 /** |  | 
| 780 * Approximate determination if a character is at a contraction end. |  | 
| 781 * Guaranteed to be TRUE if a character is at the end of a contraction, |  | 
| 782 * otherwise it is not deterministic. |  | 
| 783 * @param c character to be determined |  | 
| 784 * @param coll collator |  | 
| 785 */ |  | 
| 786 static |  | 
| 787 inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) { |  | 
| 788     if (c < coll->minContrEndCP) { |  | 
| 789         return FALSE; |  | 
| 790     } |  | 
| 791 |  | 
| 792     int32_t  hash = c; |  | 
| 793     uint8_t  htbyte; |  | 
| 794     if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { |  | 
| 795         if (U16_IS_TRAIL(c)) { |  | 
| 796             return TRUE; |  | 
| 797         } |  | 
| 798         hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; |  | 
| 799     } |  | 
| 800     htbyte = coll->contrEndCP[hash>>3]; |  | 
| 801     return (((htbyte >> (hash & 7)) & 1) == 1); |  | 
| 802 } |  | 
| 803 |  | 
| 804 |  | 
| 805 |  | 
| 806 /* |  | 
| 807 *   i_getCombiningClass() |  | 
| 808 *        A fast, at least partly inline version of u_getCombiningClass() |  | 
| 809 *        This is a candidate for further optimization.  Used heavily |  | 
| 810 *        in contraction processing. |  | 
| 811 */ |  | 
| 812 static |  | 
| 813 inline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) { |  | 
| 814     uint8_t sCC = 0; |  | 
| 815     if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) { |  | 
| 816         sCC = u_getCombiningClass(c); |  | 
| 817     } |  | 
| 818     return sCC; |  | 
| 819 } |  | 
| 820 |  | 
| 821 UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, con
      st UCollator *UCA, UErrorCode *status) { |  | 
| 822     UChar c; |  | 
| 823     UCollator *result = fillIn; |  | 
| 824     if(U_FAILURE(*status) || image == NULL) { |  | 
| 825         return NULL; |  | 
| 826     } |  | 
| 827 |  | 
| 828     if(result == NULL) { |  | 
| 829         result = (UCollator *)uprv_malloc(sizeof(UCollator)); |  | 
| 830         if(result == NULL) { |  | 
| 831             *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 832             return result; |  | 
| 833         } |  | 
| 834         result->freeOnClose = TRUE; |  | 
| 835     } else { |  | 
| 836         result->freeOnClose = FALSE; |  | 
| 837     } |  | 
| 838 |  | 
| 839     result->delegate = NULL; |  | 
| 840 |  | 
| 841     result->image = image; |  | 
| 842     result->mapping.getFoldingOffset = _getFoldingOffset; |  | 
| 843     const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosit
      ion; |  | 
| 844     utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE -
       result->image->mappingPosition, status); |  | 
| 845     if(U_FAILURE(*status)) { |  | 
| 846         if(result->freeOnClose == TRUE) { |  | 
| 847             uprv_free(result); |  | 
| 848             result = NULL; |  | 
| 849         } |  | 
| 850         return result; |  | 
| 851     } |  | 
| 852 |  | 
| 853     result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping); |  | 
| 854     result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->
      contractionCEs); |  | 
| 855     result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->c
      ontractionIndex); |  | 
| 856     result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expan
      sion); |  | 
| 857     result->rules = NULL; |  | 
| 858     result->rulesLength = 0; |  | 
| 859     result->freeRulesOnClose = FALSE; |  | 
| 860     result->defaultReorderCodes = NULL; |  | 
| 861     result->defaultReorderCodesLength = 0; |  | 
| 862     result->freeDefaultReorderCodesOnClose = FALSE; |  | 
| 863     result->reorderCodes = NULL; |  | 
| 864     result->reorderCodesLength = 0; |  | 
| 865     result->freeReorderCodesOnClose = FALSE; |  | 
| 866     result->leadBytePermutationTable = NULL; |  | 
| 867     result->freeLeadBytePermutationTableOnClose = FALSE; |  | 
| 868 |  | 
| 869     /* get the version info from UCATableHeader and populate the Collator struct
      */ |  | 
| 870     result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/ |  | 
| 871     result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules v
      ersion*/ |  | 
| 872     result->dataVersion[2] = 0; |  | 
| 873     result->dataVersion[3] = 0; |  | 
| 874 |  | 
| 875     result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP; |  | 
| 876     result->minUnsafeCP = 0; |  | 
| 877     for (c=0; c<0x300; c++) {  // Find the smallest unsafe char. |  | 
| 878         if (ucol_unsafeCP(c, result)) break; |  | 
| 879     } |  | 
| 880     result->minUnsafeCP = c; |  | 
| 881 |  | 
| 882     result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP; |  | 
| 883     result->minContrEndCP = 0; |  | 
| 884     for (c=0; c<0x300; c++) {  // Find the Contraction-ending char. |  | 
| 885         if (ucol_contractionEndCP(c, result)) break; |  | 
| 886     } |  | 
| 887     result->minContrEndCP = c; |  | 
| 888 |  | 
| 889     /* max expansion tables */ |  | 
| 890     result->endExpansionCE = (uint32_t*)((uint8_t*)result->image + |  | 
| 891                                          result->image->endExpansionCE); |  | 
| 892     result->lastEndExpansionCE = result->endExpansionCE + |  | 
| 893                                  result->image->endExpansionCECount - 1; |  | 
| 894     result->expansionCESize = (uint8_t*)result->image + |  | 
| 895                                                result->image->expansionCESize; |  | 
| 896 |  | 
| 897 |  | 
| 898     //result->errorCode = *status; |  | 
| 899 |  | 
| 900     result->latinOneCEs = NULL; |  | 
| 901 |  | 
| 902     result->latinOneRegenTable = FALSE; |  | 
| 903     result->latinOneFailed = FALSE; |  | 
| 904     result->UCA = UCA; |  | 
| 905 |  | 
| 906     /* Normally these will be set correctly later. This is the default if you us
      e UCA or the default. */ |  | 
| 907     result->ucaRules = NULL; |  | 
| 908     result->actualLocale = NULL; |  | 
| 909     result->validLocale = NULL; |  | 
| 910     result->requestedLocale = NULL; |  | 
| 911     result->hasRealData = FALSE; // real data lives in .dat file... |  | 
| 912     result->freeImageOnClose = FALSE; |  | 
| 913 |  | 
| 914     /* set attributes */ |  | 
| 915     ucol_setOptionsFromHeader( |  | 
| 916         result, |  | 
| 917         (UColOptionSet*)((uint8_t*)result->image+result->image->options), |  | 
| 918         status); |  | 
| 919     result->freeOptionsOnClose = FALSE; |  | 
| 920 |  | 
| 921     return result; |  | 
| 922 } |  | 
| 923 |  | 
| 924 /* new Mark's code */ |  | 
| 925 |  | 
| 926 /** |  | 
| 927  * For generation of Implicit CEs |  | 
| 928  * @author Davis |  | 
| 929  * |  | 
| 930  * Cleaned up so that changes can be made more easily. |  | 
| 931  * Old values: |  | 
| 932 # First Implicit: E26A792D |  | 
| 933 # Last Implicit: E3DC70C0 |  | 
| 934 # First CJK: E0030300 |  | 
| 935 # Last CJK: E0A9DD00 |  | 
| 936 # First CJK_A: E0A9DF00 |  | 
| 937 # Last CJK_A: E0DE3100 |  | 
| 938  */ |  | 
| 939 /* Following is a port of Mark's code for new treatment of implicits. |  | 
| 940  * It is positioned here, since ucol_initUCA need to initialize the |  | 
| 941  * variables below according to the data in the fractional UCA. |  | 
| 942  */ |  | 
| 943 |  | 
| 944 /** |  | 
| 945  * Function used to: |  | 
| 946  * a) collapse the 2 different Han ranges from UCA into one (in the right order)
      , and |  | 
| 947  * b) bump any non-CJK characters by 10FFFF. |  | 
| 948  * The relevant blocks are: |  | 
| 949  * A:    4E00..9FFF; CJK Unified Ideographs |  | 
| 950  *       F900..FAFF; CJK Compatibility Ideographs |  | 
| 951  * B:    3400..4DBF; CJK Unified Ideographs Extension A |  | 
| 952  *       20000..XX;  CJK Unified Ideographs Extension B (and others later on) |  | 
| 953  * As long as |  | 
| 954  *   no new B characters are allocated between 4E00 and FAFF, and |  | 
| 955  *   no new A characters are outside of this range, |  | 
| 956  * (very high probability) this simple code will work. |  | 
| 957  * The reordered blocks are: |  | 
| 958  * Block1 is CJK |  | 
| 959  * Block2 is CJK_COMPAT_USED |  | 
| 960  * Block3 is CJK_A |  | 
| 961  * (all contiguous) |  | 
| 962  * Any other CJK gets its normal code point |  | 
| 963  * Any non-CJK gets +10FFFF |  | 
| 964  * When we reorder Block1, we make sure that it is at the very start, |  | 
| 965  * so that it will use a 3-byte form. |  | 
| 966  * Warning: the we only pick up the compatibility characters that are |  | 
| 967  * NOT decomposed, so that block is smaller! |  | 
| 968  */ |  | 
| 969 |  | 
| 970 // CONSTANTS |  | 
| 971 static const UChar32 |  | 
| 972     NON_CJK_OFFSET = 0x110000, |  | 
| 973     UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2 |  | 
| 974 |  | 
| 975 /** |  | 
| 976  * Precomputed by initImplicitConstants() |  | 
| 977  */ |  | 
| 978 static int32_t |  | 
| 979     final3Multiplier = 0, |  | 
| 980     final4Multiplier = 0, |  | 
| 981     final3Count = 0, |  | 
| 982     final4Count = 0, |  | 
| 983     medialCount = 0, |  | 
| 984     min3Primary = 0, |  | 
| 985     min4Primary = 0, |  | 
| 986     max4Primary = 0, |  | 
| 987     minTrail = 0, |  | 
| 988     maxTrail = 0, |  | 
| 989     max3Trail = 0, |  | 
| 990     max4Trail = 0, |  | 
| 991     min4Boundary = 0; |  | 
| 992 |  | 
| 993 static const UChar32 |  | 
| 994     // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; |  | 
| 995     // 9FCC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;  (Unicode 6.1) |  | 
| 996     CJK_BASE = 0x4E00, |  | 
| 997     CJK_LIMIT = 0x9FCC+1, |  | 
| 998     // Unified CJK ideographs in the compatibility ideographs block. |  | 
| 999     CJK_COMPAT_USED_BASE = 0xFA0E, |  | 
| 1000     CJK_COMPAT_USED_LIMIT = 0xFA2F+1, |  | 
| 1001     // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; |  | 
| 1002     // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; |  | 
| 1003     CJK_A_BASE = 0x3400, |  | 
| 1004     CJK_A_LIMIT = 0x4DB5+1, |  | 
| 1005     // 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;; |  | 
| 1006     // 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;; |  | 
| 1007     CJK_B_BASE = 0x20000, |  | 
| 1008     CJK_B_LIMIT = 0x2A6D6+1, |  | 
| 1009     // 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;; |  | 
| 1010     // 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;; |  | 
| 1011     CJK_C_BASE = 0x2A700, |  | 
| 1012     CJK_C_LIMIT = 0x2B734+1, |  | 
| 1013     // 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;; |  | 
| 1014     // 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;; |  | 
| 1015     CJK_D_BASE = 0x2B740, |  | 
| 1016     CJK_D_LIMIT = 0x2B81D+1; |  | 
| 1017     // when adding to this list, look for all occurrences (in project) |  | 
| 1018     // of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing
      !!!! |  | 
| 1019 |  | 
| 1020 static UChar32 swapCJK(UChar32 i) { |  | 
| 1021     if (i < CJK_A_BASE) { |  | 
| 1022         // non-CJK |  | 
| 1023     } else if (i < CJK_A_LIMIT) { |  | 
| 1024         // Extension A has lower code points than the original Unihan+compat |  | 
| 1025         // but sorts higher. |  | 
| 1026         return i - CJK_A_BASE |  | 
| 1027                 + (CJK_LIMIT - CJK_BASE) |  | 
| 1028                 + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE); |  | 
| 1029     } else if (i < CJK_BASE) { |  | 
| 1030         // non-CJK |  | 
| 1031     } else if (i < CJK_LIMIT) { |  | 
| 1032         return i - CJK_BASE; |  | 
| 1033     } else if (i < CJK_COMPAT_USED_BASE) { |  | 
| 1034         // non-CJK |  | 
| 1035     } else if (i < CJK_COMPAT_USED_LIMIT) { |  | 
| 1036         return i - CJK_COMPAT_USED_BASE |  | 
| 1037                 + (CJK_LIMIT - CJK_BASE); |  | 
| 1038     } else if (i < CJK_B_BASE) { |  | 
| 1039         // non-CJK |  | 
| 1040     } else if (i < CJK_B_LIMIT) { |  | 
| 1041         return i; // non-BMP-CJK |  | 
| 1042     } else if (i < CJK_C_BASE) { |  | 
| 1043         // non-CJK |  | 
| 1044     } else if (i < CJK_C_LIMIT) { |  | 
| 1045         return i; // non-BMP-CJK |  | 
| 1046     } else if (i < CJK_D_BASE) { |  | 
| 1047         // non-CJK |  | 
| 1048     } else if (i < CJK_D_LIMIT) { |  | 
| 1049         return i; // non-BMP-CJK |  | 
| 1050     } |  | 
| 1051     return i + NON_CJK_OFFSET; // non-CJK |  | 
| 1052 } |  | 
| 1053 |  | 
| 1054 U_CAPI UChar32 U_EXPORT2 |  | 
| 1055 uprv_uca_getRawFromCodePoint(UChar32 i) { |  | 
| 1056     return swapCJK(i)+1; |  | 
| 1057 } |  | 
| 1058 |  | 
| 1059 U_CAPI UChar32 U_EXPORT2 |  | 
| 1060 uprv_uca_getCodePointFromRaw(UChar32 i) { |  | 
| 1061     i--; |  | 
| 1062     UChar32 result = 0; |  | 
| 1063     if(i >= NON_CJK_OFFSET) { |  | 
| 1064         result = i - NON_CJK_OFFSET; |  | 
| 1065     } else if(i >= CJK_B_BASE) { |  | 
| 1066         result = i; |  | 
| 1067     } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT 
      - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted |  | 
| 1068         if(i < CJK_LIMIT - CJK_BASE) { |  | 
| 1069             result = i + CJK_BASE; |  | 
| 1070         } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMP
      AT_USED_BASE)) { |  | 
| 1071             result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE); |  | 
| 1072         } else { |  | 
| 1073             result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_
      LIMIT - CJK_COMPAT_USED_BASE); |  | 
| 1074         } |  | 
| 1075     } else { |  | 
| 1076         result = -1; |  | 
| 1077     } |  | 
| 1078     return result; |  | 
| 1079 } |  | 
| 1080 |  | 
| 1081 // GET IMPLICIT PRIMARY WEIGHTS |  | 
| 1082 // Return value is left justified primary key |  | 
| 1083 U_CAPI uint32_t U_EXPORT2 |  | 
| 1084 uprv_uca_getImplicitFromRaw(UChar32 cp) { |  | 
| 1085     /* |  | 
| 1086     if (cp < 0 || cp > UCOL_MAX_INPUT) { |  | 
| 1087         throw new IllegalArgumentException("Code point out of range " + Utility.
      hex(cp)); |  | 
| 1088     } |  | 
| 1089     */ |  | 
| 1090     int32_t last0 = cp - min4Boundary; |  | 
| 1091     if (last0 < 0) { |  | 
| 1092         int32_t last1 = cp / final3Count; |  | 
| 1093         last0 = cp % final3Count; |  | 
| 1094 |  | 
| 1095         int32_t last2 = last1 / medialCount; |  | 
| 1096         last1 %= medialCount; |  | 
| 1097 |  | 
| 1098         last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at
       start |  | 
| 1099         last1 = minTrail + last1; // offset |  | 
| 1100         last2 = min3Primary + last2; // offset |  | 
| 1101         /* |  | 
| 1102         if (last2 >= min4Primary) { |  | 
| 1103             throw new IllegalArgumentException("4-byte out of range: " + Utility
      .hex(cp) + ", " + Utility.hex(last2)); |  | 
| 1104         } |  | 
| 1105         */ |  | 
| 1106         return (last2 << 24) + (last1 << 16) + (last0 << 8); |  | 
| 1107     } else { |  | 
| 1108         int32_t last1 = last0 / final4Count; |  | 
| 1109         last0 %= final4Count; |  | 
| 1110 |  | 
| 1111         int32_t last2 = last1 / medialCount; |  | 
| 1112         last1 %= medialCount; |  | 
| 1113 |  | 
| 1114         int32_t last3 = last2 / medialCount; |  | 
| 1115         last2 %= medialCount; |  | 
| 1116 |  | 
| 1117         last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at
       start |  | 
| 1118         last1 = minTrail + last1; // offset |  | 
| 1119         last2 = minTrail + last2; // offset |  | 
| 1120         last3 = min4Primary + last3; // offset |  | 
| 1121         /* |  | 
| 1122         if (last3 > max4Primary) { |  | 
| 1123             throw new IllegalArgumentException("4-byte out of range: " + Utility
      .hex(cp) + ", " + Utility.hex(last3)); |  | 
| 1124         } |  | 
| 1125         */ |  | 
| 1126         return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0; |  | 
| 1127     } |  | 
| 1128 } |  | 
| 1129 |  | 
| 1130 static uint32_t U_EXPORT2 |  | 
| 1131 uprv_uca_getImplicitPrimary(UChar32 cp) { |  | 
| 1132    //fprintf(stdout, "Incoming: %04x\n", cp); |  | 
| 1133     //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp)); |  | 
| 1134 |  | 
| 1135     cp = swapCJK(cp); |  | 
| 1136     cp++; |  | 
| 1137     // we now have a range of numbers from 0 to 21FFFF. |  | 
| 1138 |  | 
| 1139     //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp)); |  | 
| 1140     //fprintf(stdout, "CJK swapped: %04x\n", cp); |  | 
| 1141 |  | 
| 1142     return uprv_uca_getImplicitFromRaw(cp); |  | 
| 1143 } |  | 
| 1144 |  | 
| 1145 /** |  | 
| 1146  * Converts implicit CE into raw integer ("code point") |  | 
| 1147  * @param implicit |  | 
| 1148  * @return -1 if illegal format |  | 
| 1149  */ |  | 
| 1150 U_CAPI UChar32 U_EXPORT2 |  | 
| 1151 uprv_uca_getRawFromImplicit(uint32_t implicit) { |  | 
| 1152     UChar32 result; |  | 
| 1153     UChar32 b3 = implicit & 0xFF; |  | 
| 1154     UChar32 b2 = (implicit >> 8) & 0xFF; |  | 
| 1155     UChar32 b1 = (implicit >> 16) & 0xFF; |  | 
| 1156     UChar32 b0 = (implicit >> 24) & 0xFF; |  | 
| 1157 |  | 
| 1158     // simple parameter checks |  | 
| 1159     if (b0 < min3Primary || b0 > max4Primary |  | 
| 1160         || b1 < minTrail || b1 > maxTrail) |  | 
| 1161         return -1; |  | 
| 1162     // normal offsets |  | 
| 1163     b1 -= minTrail; |  | 
| 1164 |  | 
| 1165     // take care of the final values, and compose |  | 
| 1166     if (b0 < min4Primary) { |  | 
| 1167         if (b2 < minTrail || b2 > max3Trail || b3 != 0) |  | 
| 1168             return -1; |  | 
| 1169         b2 -= minTrail; |  | 
| 1170         UChar32 remainder = b2 % final3Multiplier; |  | 
| 1171         if (remainder != 0) |  | 
| 1172             return -1; |  | 
| 1173         b0 -= min3Primary; |  | 
| 1174         b2 /= final3Multiplier; |  | 
| 1175         result = ((b0 * medialCount) + b1) * final3Count + b2; |  | 
| 1176     } else { |  | 
| 1177         if (b2 < minTrail || b2 > maxTrail |  | 
| 1178             || b3 < minTrail || b3 > max4Trail) |  | 
| 1179             return -1; |  | 
| 1180         b2 -= minTrail; |  | 
| 1181         b3 -= minTrail; |  | 
| 1182         UChar32 remainder = b3 % final4Multiplier; |  | 
| 1183         if (remainder != 0) |  | 
| 1184             return -1; |  | 
| 1185         b3 /= final4Multiplier; |  | 
| 1186         b0 -= min4Primary; |  | 
| 1187         result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + 
      b3 + min4Boundary; |  | 
| 1188     } |  | 
| 1189     // final check |  | 
| 1190     if (result < 0 || result > UCOL_MAX_INPUT) |  | 
| 1191         return -1; |  | 
| 1192     return result; |  | 
| 1193 } |  | 
| 1194 |  | 
| 1195 |  | 
| 1196 static inline int32_t divideAndRoundUp(int a, int b) { |  | 
| 1197     return 1 + (a-1)/b; |  | 
| 1198 } |  | 
| 1199 |  | 
| 1200 /* this function is either called from initUCA or from genUCA before |  | 
| 1201  * doing canonical closure for the UCA. |  | 
| 1202  */ |  | 
| 1203 |  | 
| 1204 /** |  | 
| 1205  * Set up to generate implicits. |  | 
| 1206  * Maintenance Note:  this function may end up being called more than once, due |  | 
| 1207  *                    to threading races during initialization.  Make sure that |  | 
| 1208  *                    none of the Constants is ever transiently assigned an |  | 
| 1209  *                    incorrect value. |  | 
| 1210  * @param minPrimary |  | 
| 1211  * @param maxPrimary |  | 
| 1212  * @param minTrail final byte |  | 
| 1213  * @param maxTrail final byte |  | 
| 1214  * @param gap3 the gap we leave for tailoring for 3-byte forms |  | 
| 1215  * @param gap4 the gap we leave for tailoring for 4-byte forms |  | 
| 1216  */ |  | 
| 1217 static void initImplicitConstants(int minPrimary, int maxPrimary, |  | 
| 1218                                     int minTrailIn, int maxTrailIn, |  | 
| 1219                                     int gap3, int primaries3count, |  | 
| 1220                                     UErrorCode *status) { |  | 
| 1221     // some simple parameter checks |  | 
| 1222     if ((minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) |  | 
| 1223         || (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF) |  | 
| 1224         || (primaries3count < 1)) |  | 
| 1225     { |  | 
| 1226         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 1227         return; |  | 
| 1228     }; |  | 
| 1229 |  | 
| 1230     minTrail = minTrailIn; |  | 
| 1231     maxTrail = maxTrailIn; |  | 
| 1232 |  | 
| 1233     min3Primary = minPrimary; |  | 
| 1234     max4Primary = maxPrimary; |  | 
| 1235     // compute constants for use later. |  | 
| 1236     // number of values we can use in trailing bytes |  | 
| 1237     // leave room for empty values between AND above, e.g. if gap = 2 |  | 
| 1238     // range 3..7 => +3 -4 -5 -6 -7: so 1 value |  | 
| 1239     // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values |  | 
| 1240     // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values |  | 
| 1241     final3Multiplier = gap3 + 1; |  | 
| 1242     final3Count = (maxTrail - minTrail + 1) / final3Multiplier; |  | 
| 1243     max3Trail = minTrail + (final3Count - 1) * final3Multiplier; |  | 
| 1244 |  | 
| 1245     // medials can use full range |  | 
| 1246     medialCount = (maxTrail - minTrail + 1); |  | 
| 1247     // find out how many values fit in each form |  | 
| 1248     int32_t threeByteCount = medialCount * final3Count; |  | 
| 1249     // now determine where the 3/4 boundary is. |  | 
| 1250     // we use 3 bytes below the boundary, and 4 above |  | 
| 1251     int32_t primariesAvailable = maxPrimary - minPrimary + 1; |  | 
| 1252     int32_t primaries4count = primariesAvailable - primaries3count; |  | 
| 1253 |  | 
| 1254 |  | 
| 1255     int32_t min3ByteCoverage = primaries3count * threeByteCount; |  | 
| 1256     min4Primary = minPrimary + primaries3count; |  | 
| 1257     min4Boundary = min3ByteCoverage; |  | 
| 1258     // Now expand out the multiplier for the 4 bytes, and redo. |  | 
| 1259 |  | 
| 1260     int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary; |  | 
| 1261     int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count
      ); |  | 
| 1262     int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCo
      unt * medialCount); |  | 
| 1263     int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte; |  | 
| 1264     if (gap4 < 1) { |  | 
| 1265         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 1266         return; |  | 
| 1267     } |  | 
| 1268     final4Multiplier = gap4 + 1; |  | 
| 1269     final4Count = neededPerFinalByte; |  | 
| 1270     max4Trail = minTrail + (final4Count - 1) * final4Multiplier; |  | 
| 1271 } |  | 
| 1272 |  | 
| 1273     /** |  | 
| 1274      * Supply parameters for generating implicit CEs |  | 
| 1275      */ |  | 
| 1276 U_CAPI void U_EXPORT2 |  | 
| 1277 uprv_uca_initImplicitConstants(UErrorCode *status) { |  | 
| 1278     // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms
      . |  | 
| 1279     //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status); |  | 
| 1280     initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1,
       1, status); |  | 
| 1281 } |  | 
| 1282 |  | 
| 1283 |  | 
| 1284 /*    collIterNormalize     Incremental Normalization happens here.             
                */ |  | 
| 1285 /*                          pick up the range of chars identifed by FCD,        
                */ |  | 
| 1286 /*                          normalize it into the collIterate's writable buffer,
                */ |  | 
| 1287 /*                          switch the collIterate's state to use the writable b
      uffer.    */ |  | 
| 1288 /*                                                                              
                */ |  | 
| 1289 static |  | 
| 1290 void collIterNormalize(collIterate *collationSource) |  | 
| 1291 { |  | 
| 1292     UErrorCode  status = U_ZERO_ERROR; |  | 
| 1293     const UChar *srcP = collationSource->pos - 1;      /*  Start of chars to nor
      malize    */ |  | 
| 1294     const UChar *endP = collationSource->fcdPosition;  /* End of region to norma
      lize+1    */ |  | 
| 1295 |  | 
| 1296     collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - 
      srcP)), |  | 
| 1297                                     collationSource->writableBuffer, |  | 
| 1298                                     status); |  | 
| 1299     if (U_FAILURE(status)) { |  | 
| 1300 #ifdef UCOL_DEBUG |  | 
| 1301         fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_erro
      rName(status)); |  | 
| 1302 #endif |  | 
| 1303         return; |  | 
| 1304     } |  | 
| 1305 |  | 
| 1306     collationSource->pos        = collationSource->writableBuffer.getTerminatedB
      uffer(); |  | 
| 1307     collationSource->origFlags  = collationSource->flags; |  | 
| 1308     collationSource->flags     |= UCOL_ITER_INNORMBUF; |  | 
| 1309     collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE
      _ITERATOR); |  | 
| 1310 } |  | 
| 1311 |  | 
| 1312 |  | 
| 1313 // This function takes the iterator and extracts normalized stuff up to the next
       boundary |  | 
| 1314 // It is similar in the end results to the collIterNormalize, but for the cases 
      when we |  | 
| 1315 // use an iterator |  | 
| 1316 /*static |  | 
| 1317 inline void normalizeIterator(collIterate *collationSource) { |  | 
| 1318   UErrorCode status = U_ZERO_ERROR; |  | 
| 1319   UBool wasNormalized = FALSE; |  | 
| 1320   //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->ite
      rator, UITER_CURRENT); |  | 
| 1321   uint32_t iterIndex = collationSource->iterator->getState(collationSource->iter
      ator); |  | 
| 1322   int32_t normLen = unorm_next(collationSource->iterator, collationSource->writa
      bleBuffer, |  | 
| 1323     (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalize
      d, &status); |  | 
| 1324   if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->w
      ritableBufSize) { |  | 
| 1325     // reallocate and terminate |  | 
| 1326     if(!u_growBufferFromStatic(collationSource->stackWritableBuffer, |  | 
| 1327                                &collationSource->writableBuffer, |  | 
| 1328                                (int32_t *)&collationSource->writableBufSize, nor
      mLen + 1, |  | 
| 1329                                0) |  | 
| 1330     ) { |  | 
| 1331     #ifdef UCOL_DEBUG |  | 
| 1332         fprintf(stderr, "normalizeIterator(), out of memory\n"); |  | 
| 1333     #endif |  | 
| 1334         return; |  | 
| 1335     } |  | 
| 1336     status = U_ZERO_ERROR; |  | 
| 1337     //collationSource->iterator->move(collationSource->iterator, iterIndex, UITE
      R_ZERO); |  | 
| 1338     collationSource->iterator->setState(collationSource->iterator, iterIndex, &s
      tatus); |  | 
| 1339     normLen = unorm_next(collationSource->iterator, collationSource->writableBuf
      fer, |  | 
| 1340     (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalize
      d, &status); |  | 
| 1341   } |  | 
| 1342   // Terminate the buffer - we already checked that it is big enough |  | 
| 1343   collationSource->writableBuffer[normLen] = 0; |  | 
| 1344   if(collationSource->writableBuffer != collationSource->stackWritableBuffer) { |  | 
| 1345       collationSource->flags |= UCOL_ITER_ALLOCATED; |  | 
| 1346   } |  | 
| 1347   collationSource->pos        = collationSource->writableBuffer; |  | 
| 1348   collationSource->origFlags  = collationSource->flags; |  | 
| 1349   collationSource->flags     |= UCOL_ITER_INNORMBUF; |  | 
| 1350   collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_I
      TERATOR); |  | 
| 1351 }*/ |  | 
| 1352 |  | 
| 1353 |  | 
| 1354 /* Incremental FCD check and normalize                                          
                */ |  | 
| 1355 /*   Called from getNextCE when normalization state is suspect.                 
                */ |  | 
| 1356 /*   When entering, the state is known to be this:                              
                */ |  | 
| 1357 /*      o   We are working in the main buffer of the collIterate, not the side  
                */ |  | 
| 1358 /*          writable buffer.  When in the side buffer, normalization mode is alw
      ays off,  */ |  | 
| 1359 /*          so we won't get here.                                               
                */ |  | 
| 1360 /*      o   The leading combining class from the current character is 0 or      
                */ |  | 
| 1361 /*          the trailing combining class of the previous char was zero.         
                */ |  | 
| 1362 /*          True because the previous call to this function will have always exi
      ted       */ |  | 
| 1363 /*          that way, and we get called for every char where cc might be non-zer
      o.        */ |  | 
| 1364 static |  | 
| 1365 inline UBool collIterFCD(collIterate *collationSource) { |  | 
| 1366     const UChar *srcP, *endP; |  | 
| 1367     uint8_t     leadingCC; |  | 
| 1368     uint8_t     prevTrailingCC = 0; |  | 
| 1369     uint16_t    fcd; |  | 
| 1370     UBool       needNormalize = FALSE; |  | 
| 1371 |  | 
| 1372     srcP = collationSource->pos-1; |  | 
| 1373 |  | 
| 1374     if (collationSource->flags & UCOL_ITER_HASLEN) { |  | 
| 1375         endP = collationSource->endp; |  | 
| 1376     } else { |  | 
| 1377         endP = NULL; |  | 
| 1378     } |  | 
| 1379 |  | 
| 1380     // Get the trailing combining class of the current character. If it's zero, 
      we are OK. |  | 
| 1381     fcd = g_nfcImpl->nextFCD16(srcP, endP); |  | 
| 1382     if (fcd != 0) { |  | 
| 1383         prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); |  | 
| 1384 |  | 
| 1385         if (prevTrailingCC != 0) { |  | 
| 1386             // The current char has a non-zero trailing CC.  Scan forward until 
      we find |  | 
| 1387             //   a char with a leading cc of zero. |  | 
| 1388             while (endP == NULL || srcP != endP) |  | 
| 1389             { |  | 
| 1390                 const UChar *savedSrcP = srcP; |  | 
| 1391 |  | 
| 1392                 fcd = g_nfcImpl->nextFCD16(srcP, endP); |  | 
| 1393                 leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); |  | 
| 1394                 if (leadingCC == 0) { |  | 
| 1395                     srcP = savedSrcP;      // Hit char that is not part of combi
      ning sequence. |  | 
| 1396                                            //   back up over it.  (Could be surr
      ogate pair!) |  | 
| 1397                     break; |  | 
| 1398                 } |  | 
| 1399 |  | 
| 1400                 if (leadingCC < prevTrailingCC) { |  | 
| 1401                     needNormalize = TRUE; |  | 
| 1402                 } |  | 
| 1403 |  | 
| 1404                 prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); |  | 
| 1405             } |  | 
| 1406         } |  | 
| 1407     } |  | 
| 1408 |  | 
| 1409     collationSource->fcdPosition = (UChar *)srcP; |  | 
| 1410 |  | 
| 1411     return needNormalize; |  | 
| 1412 } |  | 
| 1413 |  | 
| 1414 /****************************************************************************/ |  | 
| 1415 /* Following are the CE retrieval functions                                 */ |  | 
| 1416 /*                                                                          */ |  | 
| 1417 /****************************************************************************/ |  | 
| 1418 |  | 
| 1419 static uint32_t getImplicit(UChar32 cp, collIterate *collationSource); |  | 
| 1420 static uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource); |  | 
| 1421 |  | 
| 1422 /* there should be a macro version of this function in the header file */ |  | 
| 1423 /* This is the first function that tries to fetch a collation element  */ |  | 
| 1424 /* If it's not succesfull or it encounters a more difficult situation  */ |  | 
| 1425 /* some more sofisticated and slower functions are invoked             */ |  | 
| 1426 static |  | 
| 1427 inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSou
      rce, UErrorCode *status) { |  | 
| 1428     uint32_t order = 0; |  | 
| 1429     if (collationSource->CEpos > collationSource->toReturn) {       /* Are there
       any CEs from previous expansions? */ |  | 
| 1430         order = *(collationSource->toReturn++);                         /* if so
      , return them */ |  | 
| 1431         if(collationSource->CEpos == collationSource->toReturn) { |  | 
| 1432             collationSource->CEpos = collationSource->toReturn = collationSource
      ->extendCEs ? collationSource->extendCEs : collationSource->CEs; |  | 
| 1433         } |  | 
| 1434         return order; |  | 
| 1435     } |  | 
| 1436 |  | 
| 1437     UChar ch = 0; |  | 
| 1438     collationSource->offsetReturn = NULL; |  | 
| 1439 |  | 
| 1440     do { |  | 
| 1441         for (;;)                           /* Loop handles case when incremental
       normalize switches   */ |  | 
| 1442         {                                  /*   to or from the side buffer / ori
      ginal string, and we  */ |  | 
| 1443             /*   need to start again to get the next character.        */ |  | 
| 1444 |  | 
| 1445             if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBU
      F | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0) |  | 
| 1446             { |  | 
| 1447                 // The source string is null terminated and we're not working fr
      om the side buffer, |  | 
| 1448                 //   and we're not normalizing.  This is the fast path. |  | 
| 1449                 //   (We can be in the side buffer for Thai pre-vowel reordering
       even when not normalizing.) |  | 
| 1450                 ch = *collationSource->pos++; |  | 
| 1451                 if (ch != 0) { |  | 
| 1452                     break; |  | 
| 1453                 } |  | 
| 1454                 else { |  | 
| 1455                     return UCOL_NO_MORE_CES; |  | 
| 1456                 } |  | 
| 1457             } |  | 
| 1458 |  | 
| 1459             if (collationSource->flags & UCOL_ITER_HASLEN) { |  | 
| 1460                 // Normal path for strings when length is specified. |  | 
| 1461                 //   (We can't be in side buffer because it is always null termi
      nated.) |  | 
| 1462                 if (collationSource->pos >= collationSource->endp) { |  | 
| 1463                     // Ran off of the end of the main source string.  We're done
      . |  | 
| 1464                     return UCOL_NO_MORE_CES; |  | 
| 1465                 } |  | 
| 1466                 ch = *collationSource->pos++; |  | 
| 1467             } |  | 
| 1468             else if(collationSource->flags & UCOL_USE_ITERATOR) { |  | 
| 1469                 UChar32 iterCh = collationSource->iterator->next(collationSource
      ->iterator); |  | 
| 1470                 if(iterCh == U_SENTINEL) { |  | 
| 1471                     return UCOL_NO_MORE_CES; |  | 
| 1472                 } |  | 
| 1473                 ch = (UChar)iterCh; |  | 
| 1474             } |  | 
| 1475             else |  | 
| 1476             { |  | 
| 1477                 // Null terminated string. |  | 
| 1478                 ch = *collationSource->pos++; |  | 
| 1479                 if (ch == 0) { |  | 
| 1480                     // Ran off end of buffer. |  | 
| 1481                     if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { |  | 
| 1482                         // Ran off end of main string. backing up one character. |  | 
| 1483                         collationSource->pos--; |  | 
| 1484                         return UCOL_NO_MORE_CES; |  | 
| 1485                     } |  | 
| 1486                     else |  | 
| 1487                     { |  | 
| 1488                         // Hit null in the normalize side buffer. |  | 
| 1489                         // Usually this means the end of the normalized data, |  | 
| 1490                         // except for one odd case: a null followed by combining
       chars, |  | 
| 1491                         //   which is the case if we are at the start of the buf
      fer. |  | 
| 1492                         if (collationSource->pos == collationSource->writableBuf
      fer.getBuffer()+1) { |  | 
| 1493                             break; |  | 
| 1494                         } |  | 
| 1495 |  | 
| 1496                         //  Null marked end of side buffer. |  | 
| 1497                         //   Revert to the main string and |  | 
| 1498                         //   loop back to top to try again to get a character. |  | 
| 1499                         collationSource->pos   = collationSource->fcdPosition; |  | 
| 1500                         collationSource->flags = collationSource->origFlags; |  | 
| 1501                         continue; |  | 
| 1502                     } |  | 
| 1503                 } |  | 
| 1504             } |  | 
| 1505 |  | 
| 1506             if(collationSource->flags&UCOL_HIRAGANA_Q) { |  | 
| 1507                 /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set 
      the flag |  | 
| 1508                  * based on whether the previous codepoint was Hiragana or Katak
      ana. |  | 
| 1509                  */ |  | 
| 1510                 if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)
      ) || |  | 
| 1511                         ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 
      0x3099 && ch <= 0x309C))) { |  | 
| 1512                     collationSource->flags |= UCOL_WAS_HIRAGANA; |  | 
| 1513                 } else { |  | 
| 1514                     collationSource->flags &= ~UCOL_WAS_HIRAGANA; |  | 
| 1515                 } |  | 
| 1516             } |  | 
| 1517 |  | 
| 1518             // We've got a character.  See if there's any fcd and/or normalizati
      on stuff to do. |  | 
| 1519             //    Note that UCOL_ITER_NORM flag is always zero when we are in th
      e side buffer. |  | 
| 1520             if ((collationSource->flags & UCOL_ITER_NORM) == 0) { |  | 
| 1521                 break; |  | 
| 1522             } |  | 
| 1523 |  | 
| 1524             if (collationSource->fcdPosition >= collationSource->pos) { |  | 
| 1525                 // An earlier FCD check has already covered the current characte
      r. |  | 
| 1526                 // We can go ahead and process this char. |  | 
| 1527                 break; |  | 
| 1528             } |  | 
| 1529 |  | 
| 1530             if (ch < ZERO_CC_LIMIT_ ) { |  | 
| 1531                 // Fast fcd safe path.  Trailing combining class == 0.  This cha
      r is OK. |  | 
| 1532                 break; |  | 
| 1533             } |  | 
| 1534 |  | 
| 1535             if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { |  | 
| 1536                 // We need to peek at the next character in order to tell if we 
      are FCD |  | 
| 1537                 if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSour
      ce->pos >= collationSource->endp) { |  | 
| 1538                     // We are at the last char of source string. |  | 
| 1539                     //  It is always OK for FCD check. |  | 
| 1540                     break; |  | 
| 1541                 } |  | 
| 1542 |  | 
| 1543                 // Not at last char of source string (or we'll check against ter
      minating null).  Do the FCD fast test |  | 
| 1544                 if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) { |  | 
| 1545                     break; |  | 
| 1546                 } |  | 
| 1547             } |  | 
| 1548 |  | 
| 1549 |  | 
| 1550             // Need a more complete FCD check and possible normalization. |  | 
| 1551             if (collIterFCD(collationSource)) { |  | 
| 1552                 collIterNormalize(collationSource); |  | 
| 1553             } |  | 
| 1554             if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) { |  | 
| 1555                 //  No normalization was needed.  Go ahead and process the char 
      we already had. |  | 
| 1556                 break; |  | 
| 1557             } |  | 
| 1558 |  | 
| 1559             // Some normalization happened.  Next loop iteration will pick up a 
      char |  | 
| 1560             //   from the normalization buffer. |  | 
| 1561 |  | 
| 1562         }   // end for (;;) |  | 
| 1563 |  | 
| 1564 |  | 
| 1565         if (ch <= 0xFF) { |  | 
| 1566             /*  For latin-1 characters we never need to fall back to the UCA tab
      le        */ |  | 
| 1567             /*    because all of the UCA data is replicated in the latinOneMappi
      ng array  */ |  | 
| 1568             order = coll->latinOneMapping[ch]; |  | 
| 1569             if (order > UCOL_NOT_FOUND) { |  | 
| 1570                 order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, 
      status); |  | 
| 1571             } |  | 
| 1572         } |  | 
| 1573         else |  | 
| 1574         { |  | 
| 1575             // Always use UCA for Han, Hangul |  | 
| 1576             // (Han extension A is before main Han block) |  | 
| 1577             // **** Han compatibility chars ?? **** |  | 
| 1578             if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && |  | 
| 1579                 (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) { |  | 
| 1580                 if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) { |  | 
| 1581                     // between the two target ranges; do normal lookup |  | 
| 1582                     // **** this range is YI, Modifier tone letters, **** |  | 
| 1583                     // **** Latin-D, Syloti Nagari, Phagas-pa.       **** |  | 
| 1584                     // **** Latin-D might be tailored, so we need to **** |  | 
| 1585                     // **** do the normal lookup for these guys.     **** |  | 
| 1586                     order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |  | 
| 1587                 } else { |  | 
| 1588                     // in one of the target ranges; use UCA |  | 
| 1589                     order = UCOL_NOT_FOUND; |  | 
| 1590                 } |  | 
| 1591             } else { |  | 
| 1592                 order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |  | 
| 1593             } |  | 
| 1594 |  | 
| 1595             if(order > UCOL_NOT_FOUND) {                                       /
      * if a CE is special                */ |  | 
| 1596                 order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, 
      status);    /* and try to get the special CE     */ |  | 
| 1597             } |  | 
| 1598 |  | 
| 1599             if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a g
      ood CE in the tailoring */ |  | 
| 1600                 /* if we got here, the codepoint MUST be over 0xFF - so we look 
      directly in the trie */ |  | 
| 1601                 order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); |  | 
| 1602 |  | 
| 1603                 if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE *
      / |  | 
| 1604                     order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collatio
      nSource, status); |  | 
| 1605                 } |  | 
| 1606             } |  | 
| 1607         } |  | 
| 1608     } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_L
      AST_HANGUL ); |  | 
| 1609 |  | 
| 1610     if(order == UCOL_NOT_FOUND) { |  | 
| 1611         order = getImplicit(ch, collationSource); |  | 
| 1612     } |  | 
| 1613     return order; /* return the CE */ |  | 
| 1614 } |  | 
| 1615 |  | 
| 1616 /* ucol_getNextCE, out-of-line version for use from other files.   */ |  | 
| 1617 U_CAPI uint32_t  U_EXPORT2 |  | 
| 1618 ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *
      status) { |  | 
| 1619     return ucol_IGetNextCE(coll, collationSource, status); |  | 
| 1620 } |  | 
| 1621 |  | 
| 1622 |  | 
| 1623 /** |  | 
| 1624 * Incremental previous normalization happens here. Pick up the range of chars |  | 
| 1625 * identifed by FCD, normalize it into the collIterate's writable buffer, |  | 
| 1626 * switch the collIterate's state to use the writable buffer. |  | 
| 1627 * @param data collation iterator data |  | 
| 1628 */ |  | 
| 1629 static |  | 
| 1630 void collPrevIterNormalize(collIterate *data) |  | 
| 1631 { |  | 
| 1632     UErrorCode status  = U_ZERO_ERROR; |  | 
| 1633     const UChar *pEnd   = data->pos;  /* End normalize + 1 */ |  | 
| 1634     const UChar *pStart; |  | 
| 1635 |  | 
| 1636     /* Start normalize */ |  | 
| 1637     if (data->fcdPosition == NULL) { |  | 
| 1638         pStart = data->string; |  | 
| 1639     } |  | 
| 1640     else { |  | 
| 1641         pStart = data->fcdPosition + 1; |  | 
| 1642     } |  | 
| 1643 |  | 
| 1644     int32_t normLen = |  | 
| 1645         data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pSta
      rt) + 1)), |  | 
| 1646                              data->writableBuffer, |  | 
| 1647                              status). |  | 
| 1648         length(); |  | 
| 1649     if(U_FAILURE(status)) { |  | 
| 1650         return; |  | 
| 1651     } |  | 
| 1652     /* |  | 
| 1653     this puts the null termination infront of the normalized string instead |  | 
| 1654     of the end |  | 
| 1655     */ |  | 
| 1656     data->writableBuffer.insert(0, (UChar)0); |  | 
| 1657 |  | 
| 1658     /* |  | 
| 1659      * The usual case at this point is that we've got a base |  | 
| 1660      * character followed by marks that were normalized. If |  | 
| 1661      * fcdPosition is NULL, that means that we backed up to |  | 
| 1662      * the beginning of the string and there's no base character. |  | 
| 1663      * |  | 
| 1664      * Forward processing will usually normalize when it sees |  | 
| 1665      * the first mark, so that mark will get it's natural offset |  | 
| 1666      * and the rest will get the offset of the character following |  | 
| 1667      * the marks. The base character will also get its natural offset. |  | 
| 1668      * |  | 
| 1669      * We write the offset of the base character, if there is one, |  | 
| 1670      * followed by the offset of the first mark and then the offsets |  | 
| 1671      * of the rest of the marks. |  | 
| 1672      */ |  | 
| 1673     int32_t firstMarkOffset = 0; |  | 
| 1674     int32_t trailOffset     = (int32_t)(data->pos - data->string + 1); |  | 
| 1675     int32_t trailCount      = normLen - 1; |  | 
| 1676 |  | 
| 1677     if (data->fcdPosition != NULL) { |  | 
| 1678         int32_t baseOffset = (int32_t)(data->fcdPosition - data->string); |  | 
| 1679         UChar   baseChar   = *data->fcdPosition; |  | 
| 1680 |  | 
| 1681         firstMarkOffset = baseOffset + 1; |  | 
| 1682 |  | 
| 1683         /* |  | 
| 1684          * If the base character is the start of a contraction, forward processi
      ng |  | 
| 1685          * will normalize the marks while checking for the contraction, which me
      ans |  | 
| 1686          * that the offset of the first mark will the same as the other marks. |  | 
| 1687          * |  | 
| 1688          * **** THIS IS PROBABLY NOT A COMPLETE TEST **** |  | 
| 1689          */ |  | 
| 1690         if (baseChar >= 0x100) { |  | 
| 1691             uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, bas
      eChar); |  | 
| 1692 |  | 
| 1693             if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) { |  | 
| 1694                 baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, bas
      eChar); |  | 
| 1695             } |  | 
| 1696 |  | 
| 1697             if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION
      _TAG) { |  | 
| 1698                 firstMarkOffset = trailOffset; |  | 
| 1699             } |  | 
| 1700         } |  | 
| 1701 |  | 
| 1702         data->appendOffset(baseOffset, status); |  | 
| 1703     } |  | 
| 1704 |  | 
| 1705     data->appendOffset(firstMarkOffset, status); |  | 
| 1706 |  | 
| 1707     for (int32_t i = 0; i < trailCount; i += 1) { |  | 
| 1708         data->appendOffset(trailOffset, status); |  | 
| 1709     } |  | 
| 1710 |  | 
| 1711     data->offsetRepeatValue = trailOffset; |  | 
| 1712 |  | 
| 1713     data->offsetReturn = data->offsetStore - 1; |  | 
| 1714     if (data->offsetReturn == data->offsetBuffer) { |  | 
| 1715         data->offsetStore = data->offsetBuffer; |  | 
| 1716     } |  | 
| 1717 |  | 
| 1718     data->pos        = data->writableBuffer.getTerminatedBuffer() + 1 + normLen; |  | 
| 1719     data->origFlags  = data->flags; |  | 
| 1720     data->flags     |= UCOL_ITER_INNORMBUF; |  | 
| 1721     data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); |  | 
| 1722 } |  | 
| 1723 |  | 
| 1724 |  | 
| 1725 /** |  | 
| 1726 * Incremental FCD check for previous iteration and normalize. Called from |  | 
| 1727 * getPrevCE when normalization state is suspect. |  | 
| 1728 * When entering, the state is known to be this: |  | 
| 1729 * o  We are working in the main buffer of the collIterate, not the side |  | 
| 1730 *    writable buffer. When in the side buffer, normalization mode is always |  | 
| 1731 *    off, so we won't get here. |  | 
| 1732 * o  The leading combining class from the current character is 0 or the |  | 
| 1733 *    trailing combining class of the previous char was zero. |  | 
| 1734 *    True because the previous call to this function will have always exited |  | 
| 1735 *    that way, and we get called for every char where cc might be non-zero. |  | 
| 1736 * @param data collation iterate struct |  | 
| 1737 * @return normalization status, TRUE for normalization to be done, FALSE |  | 
| 1738 *         otherwise |  | 
| 1739 */ |  | 
| 1740 static |  | 
| 1741 inline UBool collPrevIterFCD(collIterate *data) |  | 
| 1742 { |  | 
| 1743     const UChar *src, *start; |  | 
| 1744     uint8_t     leadingCC; |  | 
| 1745     uint8_t     trailingCC = 0; |  | 
| 1746     uint16_t    fcd; |  | 
| 1747     UBool       result = FALSE; |  | 
| 1748 |  | 
| 1749     start = data->string; |  | 
| 1750     src = data->pos + 1; |  | 
| 1751 |  | 
| 1752     /* Get the trailing combining class of the current character. */ |  | 
| 1753     fcd = g_nfcImpl->previousFCD16(start, src); |  | 
| 1754 |  | 
| 1755     leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); |  | 
| 1756 |  | 
| 1757     if (leadingCC != 0) { |  | 
| 1758         /* |  | 
| 1759         The current char has a non-zero leading combining class. |  | 
| 1760         Scan backward until we find a char with a trailing cc of zero. |  | 
| 1761         */ |  | 
| 1762         for (;;) |  | 
| 1763         { |  | 
| 1764             if (start == src) { |  | 
| 1765                 data->fcdPosition = NULL; |  | 
| 1766                 return result; |  | 
| 1767             } |  | 
| 1768 |  | 
| 1769             fcd = g_nfcImpl->previousFCD16(start, src); |  | 
| 1770 |  | 
| 1771             trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_); |  | 
| 1772 |  | 
| 1773             if (trailingCC == 0) { |  | 
| 1774                 break; |  | 
| 1775             } |  | 
| 1776 |  | 
| 1777             if (leadingCC < trailingCC) { |  | 
| 1778                 result = TRUE; |  | 
| 1779             } |  | 
| 1780 |  | 
| 1781             leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_); |  | 
| 1782         } |  | 
| 1783     } |  | 
| 1784 |  | 
| 1785     data->fcdPosition = (UChar *)src; |  | 
| 1786 |  | 
| 1787     return result; |  | 
| 1788 } |  | 
| 1789 |  | 
| 1790 /** gets a code unit from the string at a given offset |  | 
| 1791  *  Handles both normal and iterative cases. |  | 
| 1792  *  No error checking - caller beware! |  | 
| 1793  */ |  | 
| 1794 static inline |  | 
| 1795 UChar peekCodeUnit(collIterate *source, int32_t offset) { |  | 
| 1796     if(source->pos != NULL) { |  | 
| 1797         return *(source->pos + offset); |  | 
| 1798     } else if(source->iterator != NULL) { |  | 
| 1799         UChar32 c; |  | 
| 1800         if(offset != 0) { |  | 
| 1801             source->iterator->move(source->iterator, offset, UITER_CURRENT); |  | 
| 1802             c = source->iterator->next(source->iterator); |  | 
| 1803             source->iterator->move(source->iterator, -offset-1, UITER_CURRENT); |  | 
| 1804         } else { |  | 
| 1805             c = source->iterator->current(source->iterator); |  | 
| 1806         } |  | 
| 1807         return c >= 0 ? (UChar)c : 0xfffd;  // If the caller works properly, we 
      should never see c<0. |  | 
| 1808     } else { |  | 
| 1809         return 0xfffd; |  | 
| 1810     } |  | 
| 1811 } |  | 
| 1812 |  | 
| 1813 // Code point version. Treats the offset as a _code point_ delta. |  | 
| 1814 // We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-for
      med UTF-16. |  | 
| 1815 // We cannot use U16_FWD_1 and similar because we do not know the start and limi
      t of the buffer. |  | 
| 1816 static inline |  | 
| 1817 UChar32 peekCodePoint(collIterate *source, int32_t offset) { |  | 
| 1818     UChar32 c; |  | 
| 1819     if(source->pos != NULL) { |  | 
| 1820         const UChar *p = source->pos; |  | 
| 1821         if(offset >= 0) { |  | 
| 1822             // Skip forward over (offset-1) code points. |  | 
| 1823             while(--offset >= 0) { |  | 
| 1824                 if(U16_IS_LEAD(*p++) && U16_IS_TRAIL(*p)) { |  | 
| 1825                     ++p; |  | 
| 1826                 } |  | 
| 1827             } |  | 
| 1828             // Read the code point there. |  | 
| 1829             c = *p++; |  | 
| 1830             UChar trail; |  | 
| 1831             if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) { |  | 
| 1832                 c = U16_GET_SUPPLEMENTARY(c, trail); |  | 
| 1833             } |  | 
| 1834         } else /* offset<0 */ { |  | 
| 1835             // Skip backward over (offset-1) code points. |  | 
| 1836             while(++offset < 0) { |  | 
| 1837                 if(U16_IS_TRAIL(*--p) && U16_IS_LEAD(*(p - 1))) { |  | 
| 1838                     --p; |  | 
| 1839                 } |  | 
| 1840             } |  | 
| 1841             // Read the code point before that. |  | 
| 1842             c = *--p; |  | 
| 1843             UChar lead; |  | 
| 1844             if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) { |  | 
| 1845                 c = U16_GET_SUPPLEMENTARY(lead, c); |  | 
| 1846             } |  | 
| 1847         } |  | 
| 1848     } else if(source->iterator != NULL) { |  | 
| 1849         if(offset >= 0) { |  | 
| 1850             // Skip forward over (offset-1) code points. |  | 
| 1851             int32_t fwd = offset; |  | 
| 1852             while(fwd-- > 0) { |  | 
| 1853                 uiter_next32(source->iterator); |  | 
| 1854             } |  | 
| 1855             // Read the code point there. |  | 
| 1856             c = uiter_current32(source->iterator); |  | 
| 1857             // Return to the starting point, skipping backward over (offset-1) c
      ode points. |  | 
| 1858             while(offset-- > 0) { |  | 
| 1859                 uiter_previous32(source->iterator); |  | 
| 1860             } |  | 
| 1861         } else /* offset<0 */ { |  | 
| 1862             // Read backward, reading offset code points, remember only the last
      -read one. |  | 
| 1863             int32_t back = offset; |  | 
| 1864             do { |  | 
| 1865                 c = uiter_previous32(source->iterator); |  | 
| 1866             } while(++back < 0); |  | 
| 1867             // Return to the starting position, skipping forward over offset cod
      e points. |  | 
| 1868             do { |  | 
| 1869                 uiter_next32(source->iterator); |  | 
| 1870             } while(++offset < 0); |  | 
| 1871         } |  | 
| 1872     } else { |  | 
| 1873         c = U_SENTINEL; |  | 
| 1874     } |  | 
| 1875     return c; |  | 
| 1876 } |  | 
| 1877 |  | 
| 1878 /** |  | 
| 1879 * Determines if we are at the start of the data string in the backwards |  | 
| 1880 * collation iterator |  | 
| 1881 * @param data collation iterator |  | 
| 1882 * @return TRUE if we are at the start |  | 
| 1883 */ |  | 
| 1884 static |  | 
| 1885 inline UBool isAtStartPrevIterate(collIterate *data) { |  | 
| 1886     if(data->pos == NULL && data->iterator != NULL) { |  | 
| 1887         return !data->iterator->hasPrevious(data->iterator); |  | 
| 1888     } |  | 
| 1889     //return (collIter_bos(data)) || |  | 
| 1890     return (data->pos == data->string) || |  | 
| 1891               ((data->flags & UCOL_ITER_INNORMBUF) && (data->pos != NULL) && |  | 
| 1892               *(data->pos - 1) == 0 && data->fcdPosition == NULL); |  | 
| 1893 } |  | 
| 1894 |  | 
| 1895 static |  | 
| 1896 inline void goBackOne(collIterate *data) { |  | 
| 1897 # if 0 |  | 
| 1898     // somehow, it looks like we need to keep iterator synced up |  | 
| 1899     // at all times, as above. |  | 
| 1900     if(data->pos) { |  | 
| 1901         data->pos--; |  | 
| 1902     } |  | 
| 1903     if(data->iterator) { |  | 
| 1904         data->iterator->previous(data->iterator); |  | 
| 1905     } |  | 
| 1906 #endif |  | 
| 1907     if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) { |  | 
| 1908         data->iterator->previous(data->iterator); |  | 
| 1909     } |  | 
| 1910     if(data->pos) { |  | 
| 1911         data->pos --; |  | 
| 1912     } |  | 
| 1913 } |  | 
| 1914 |  | 
| 1915 /** |  | 
| 1916 * Inline function that gets a simple CE. |  | 
| 1917 * So what it does is that it will first check the expansion buffer. If the |  | 
| 1918 * expansion buffer is not empty, ie the end pointer to the expansion buffer |  | 
| 1919 * is different from the string pointer, we return the collation element at the |  | 
| 1920 * return pointer and decrement it. |  | 
| 1921 * For more complicated CEs it resorts to getComplicatedCE. |  | 
| 1922 * @param coll collator data |  | 
| 1923 * @param data collation iterator struct |  | 
| 1924 * @param status error status |  | 
| 1925 */ |  | 
| 1926 static |  | 
| 1927 inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data, |  | 
| 1928                                UErrorCode *status) |  | 
| 1929 { |  | 
| 1930     uint32_t result = (uint32_t)UCOL_NULLORDER; |  | 
| 1931 |  | 
| 1932     if (data->offsetReturn != NULL) { |  | 
| 1933         if (data->offsetRepeatCount > 0) { |  | 
| 1934                 data->offsetRepeatCount -= 1; |  | 
| 1935         } else { |  | 
| 1936             if (data->offsetReturn == data->offsetBuffer) { |  | 
| 1937                 data->offsetReturn = NULL; |  | 
| 1938                 data->offsetStore  = data->offsetBuffer; |  | 
| 1939             } else { |  | 
| 1940                 data->offsetReturn -= 1; |  | 
| 1941             } |  | 
| 1942         } |  | 
| 1943     } |  | 
| 1944 |  | 
| 1945     if ((data->extendCEs && data->toReturn > data->extendCEs) || |  | 
| 1946             (!data->extendCEs && data->toReturn > data->CEs)) |  | 
| 1947     { |  | 
| 1948         data->toReturn -= 1; |  | 
| 1949         result = *(data->toReturn); |  | 
| 1950         if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) { |  | 
| 1951             data->CEpos = data->toReturn; |  | 
| 1952         } |  | 
| 1953     } |  | 
| 1954     else { |  | 
| 1955         UChar ch = 0; |  | 
| 1956 |  | 
| 1957         do { |  | 
| 1958             /* |  | 
| 1959             Loop handles case when incremental normalize switches to or from the |  | 
| 1960             side buffer / original string, and we need to start again to get the |  | 
| 1961             next character. |  | 
| 1962             */ |  | 
| 1963             for (;;) { |  | 
| 1964                 if (data->flags & UCOL_ITER_HASLEN) { |  | 
| 1965                     /* |  | 
| 1966                     Normal path for strings when length is specified. |  | 
| 1967                     Not in side buffer because it is always null terminated. |  | 
| 1968                     */ |  | 
| 1969                     if (data->pos <= data->string) { |  | 
| 1970                         /* End of the main source string */ |  | 
| 1971                         return UCOL_NO_MORE_CES; |  | 
| 1972                     } |  | 
| 1973                     data->pos --; |  | 
| 1974                     ch = *data->pos; |  | 
| 1975                 } |  | 
| 1976                 // we are using an iterator to go back. Pray for us! |  | 
| 1977                 else if (data->flags & UCOL_USE_ITERATOR) { |  | 
| 1978                   UChar32 iterCh = data->iterator->previous(data->iterator); |  | 
| 1979                   if(iterCh == U_SENTINEL) { |  | 
| 1980                     return UCOL_NO_MORE_CES; |  | 
| 1981                   } else { |  | 
| 1982                     ch = (UChar)iterCh; |  | 
| 1983                   } |  | 
| 1984                 } |  | 
| 1985                 else { |  | 
| 1986                     data->pos --; |  | 
| 1987                     ch = *data->pos; |  | 
| 1988                     /* we are in the side buffer. */ |  | 
| 1989                     if (ch == 0) { |  | 
| 1990                         /* |  | 
| 1991                         At the start of the normalize side buffer. |  | 
| 1992                         Go back to string. |  | 
| 1993                         Because pointer points to the last accessed character, |  | 
| 1994                         hence we have to increment it by one here. |  | 
| 1995                         */ |  | 
| 1996                         data->flags = data->origFlags; |  | 
| 1997                         data->offsetRepeatValue = 0; |  | 
| 1998 |  | 
| 1999                          if (data->fcdPosition == NULL) { |  | 
| 2000                             data->pos = data->string; |  | 
| 2001                             return UCOL_NO_MORE_CES; |  | 
| 2002                         } |  | 
| 2003                         else { |  | 
| 2004                             data->pos   = data->fcdPosition + 1; |  | 
| 2005                         } |  | 
| 2006 |  | 
| 2007                        continue; |  | 
| 2008                     } |  | 
| 2009                 } |  | 
| 2010 |  | 
| 2011                 if(data->flags&UCOL_HIRAGANA_Q) { |  | 
| 2012                   if(ch>=0x3040 && ch<=0x309f) { |  | 
| 2013                     data->flags |= UCOL_WAS_HIRAGANA; |  | 
| 2014                   } else { |  | 
| 2015                     data->flags &= ~UCOL_WAS_HIRAGANA; |  | 
| 2016                   } |  | 
| 2017                 } |  | 
| 2018 |  | 
| 2019                 /* |  | 
| 2020                 * got a character to determine if there's fcd and/or normalizati
      on |  | 
| 2021                 * stuff to do. |  | 
| 2022                 * if the current character is not fcd. |  | 
| 2023                 * if current character is at the start of the string |  | 
| 2024                 * Trailing combining class == 0. |  | 
| 2025                 * Note if pos is in the writablebuffer, norm is always 0 |  | 
| 2026                 */ |  | 
| 2027                 if (ch < ZERO_CC_LIMIT_ || |  | 
| 2028                   // this should propel us out of the loop in the iterator case |  | 
| 2029                     (data->flags & UCOL_ITER_NORM) == 0 || |  | 
| 2030                     (data->fcdPosition != NULL && data->fcdPosition <= data->pos
      ) |  | 
| 2031                     || data->string == data->pos) { |  | 
| 2032                     break; |  | 
| 2033                 } |  | 
| 2034 |  | 
| 2035                 if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) { |  | 
| 2036                     /* if next character is FCD */ |  | 
| 2037                     if (data->pos == data->string) { |  | 
| 2038                         /* First char of string is always OK for FCD check */ |  | 
| 2039                         break; |  | 
| 2040                     } |  | 
| 2041 |  | 
| 2042                     /* Not first char of string, do the FCD fast test */ |  | 
| 2043                     if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) { |  | 
| 2044                         break; |  | 
| 2045                     } |  | 
| 2046                 } |  | 
| 2047 |  | 
| 2048                 /* Need a more complete FCD check and possible normalization. */ |  | 
| 2049                 if (collPrevIterFCD(data)) { |  | 
| 2050                     collPrevIterNormalize(data); |  | 
| 2051                 } |  | 
| 2052 |  | 
| 2053                 if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { |  | 
| 2054                     /*  No normalization. Go ahead and process the char. */ |  | 
| 2055                     break; |  | 
| 2056                 } |  | 
| 2057 |  | 
| 2058                 /* |  | 
| 2059                 Some normalization happened. |  | 
| 2060                 Next loop picks up a char from the normalization buffer. |  | 
| 2061                 */ |  | 
| 2062             } |  | 
| 2063 |  | 
| 2064             /* attempt to handle contractions, after removal of the backwards |  | 
| 2065             contraction |  | 
| 2066             */ |  | 
| 2067             if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) 
      { |  | 
| 2068                 result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, d
      ata, status); |  | 
| 2069             } else { |  | 
| 2070                 if (ch <= 0xFF) { |  | 
| 2071                     result = coll->latinOneMapping[ch]; |  | 
| 2072                 } |  | 
| 2073                 else { |  | 
| 2074                     // Always use UCA for [3400..9FFF], [AC00..D7AF] |  | 
| 2075                     // **** [FA0E..FA2F] ?? **** |  | 
| 2076                     if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 && |  | 
| 2077                         (ch >= 0x3400 && ch <= 0xD7AF)) { |  | 
| 2078                         if (ch > 0x9FFF && ch < 0xAC00) { |  | 
| 2079                             // between the two target ranges; do normal lookup |  | 
| 2080                             // **** this range is YI, Modifier tone letters, ***
      * |  | 
| 2081                             // **** Latin-D, Syloti Nagari, Phagas-pa.       ***
      * |  | 
| 2082                             // **** Latin-D might be tailored, so we need to ***
      * |  | 
| 2083                             // **** do the normal lookup for these guys.     ***
      * |  | 
| 2084                              result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |  | 
| 2085                         } else { |  | 
| 2086                             result = UCOL_NOT_FOUND; |  | 
| 2087                         } |  | 
| 2088                     } else { |  | 
| 2089                         result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |  | 
| 2090                     } |  | 
| 2091                 } |  | 
| 2092                 if (result > UCOL_NOT_FOUND) { |  | 
| 2093                     result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, s
      tatus); |  | 
| 2094                 } |  | 
| 2095                 if (result == UCOL_NOT_FOUND) { // Not found in master list |  | 
| 2096                     if (!isAtStartPrevIterate(data) && |  | 
| 2097                         ucol_contractionEndCP(ch, data->coll)) |  | 
| 2098                     { |  | 
| 2099                         result = UCOL_CONTRACTION; |  | 
| 2100                     } else { |  | 
| 2101                         if(coll->UCA) { |  | 
| 2102                             result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, 
      ch); |  | 
| 2103                         } |  | 
| 2104                     } |  | 
| 2105 |  | 
| 2106                     if (result > UCOL_NOT_FOUND) { |  | 
| 2107                         if(coll->UCA) { |  | 
| 2108                             result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, re
      sult, data, status); |  | 
| 2109                         } |  | 
| 2110                     } |  | 
| 2111                 } |  | 
| 2112             } |  | 
| 2113         } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= U
      COL_LAST_HANGUL ); |  | 
| 2114 |  | 
| 2115         if(result == UCOL_NOT_FOUND) { |  | 
| 2116             result = getPrevImplicit(ch, data); |  | 
| 2117         } |  | 
| 2118     } |  | 
| 2119 |  | 
| 2120     return result; |  | 
| 2121 } |  | 
| 2122 |  | 
| 2123 |  | 
| 2124 /*   ucol_getPrevCE, out-of-line version for use from other files.  */ |  | 
| 2125 U_CFUNC uint32_t  U_EXPORT2 |  | 
| 2126 ucol_getPrevCE(const UCollator *coll, collIterate *data, |  | 
| 2127                         UErrorCode *status) { |  | 
| 2128     return ucol_IGetPrevCE(coll, data, status); |  | 
| 2129 } |  | 
| 2130 |  | 
| 2131 |  | 
| 2132 /* this should be connected to special Jamo handling */ |  | 
| 2133 U_CFUNC uint32_t  U_EXPORT2 |  | 
| 2134 ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) { |  | 
| 2135     collIterate colIt; |  | 
| 2136     IInit_collIterate(coll, &u, 1, &colIt, status); |  | 
| 2137     if(U_FAILURE(*status)) { |  | 
| 2138         return 0; |  | 
| 2139     } |  | 
| 2140     return ucol_IGetNextCE(coll, &colIt, status); |  | 
| 2141 } |  | 
| 2142 |  | 
| 2143 /** |  | 
| 2144 * Inserts the argument character into the end of the buffer pushing back the |  | 
| 2145 * null terminator. |  | 
| 2146 * @param data collIterate struct data |  | 
| 2147 * @param ch character to be appended |  | 
| 2148 * @return the position of the new addition |  | 
| 2149 */ |  | 
| 2150 static |  | 
| 2151 inline const UChar * insertBufferEnd(collIterate *data, UChar ch) |  | 
| 2152 { |  | 
| 2153     int32_t oldLength = data->writableBuffer.length(); |  | 
| 2154     return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength; |  | 
| 2155 } |  | 
| 2156 |  | 
| 2157 /** |  | 
| 2158 * Inserts the argument string into the end of the buffer pushing back the |  | 
| 2159 * null terminator. |  | 
| 2160 * @param data collIterate struct data |  | 
| 2161 * @param string to be appended |  | 
| 2162 * @param length of the string to be appended |  | 
| 2163 * @return the position of the new addition |  | 
| 2164 */ |  | 
| 2165 static |  | 
| 2166 inline const UChar * insertBufferEnd(collIterate *data, const UChar *str, int32_
      t length) |  | 
| 2167 { |  | 
| 2168     int32_t oldLength = data->writableBuffer.length(); |  | 
| 2169     return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldL
      ength; |  | 
| 2170 } |  | 
| 2171 |  | 
| 2172 /** |  | 
| 2173 * Special normalization function for contraction in the forwards iterator. |  | 
| 2174 * This normalization sequence will place the current character at source->pos |  | 
| 2175 * and its following normalized sequence into the buffer. |  | 
| 2176 * The fcd position, pos will be changed. |  | 
| 2177 * pos will now point to positions in the buffer. |  | 
| 2178 * Flags will be changed accordingly. |  | 
| 2179 * @param data collation iterator data |  | 
| 2180 */ |  | 
| 2181 static |  | 
| 2182 inline void normalizeNextContraction(collIterate *data) |  | 
| 2183 { |  | 
| 2184     int32_t     strsize; |  | 
| 2185     UErrorCode  status     = U_ZERO_ERROR; |  | 
| 2186     /* because the pointer points to the next character */ |  | 
| 2187     const UChar *pStart    = data->pos - 1; |  | 
| 2188     const UChar *pEnd; |  | 
| 2189 |  | 
| 2190     if ((data->flags & UCOL_ITER_INNORMBUF) == 0) { |  | 
| 2191         data->writableBuffer.setTo(*(pStart - 1)); |  | 
| 2192         strsize               = 1; |  | 
| 2193     } |  | 
| 2194     else { |  | 
| 2195         strsize = data->writableBuffer.length(); |  | 
| 2196     } |  | 
| 2197 |  | 
| 2198     pEnd = data->fcdPosition; |  | 
| 2199 |  | 
| 2200     data->writableBuffer.append( |  | 
| 2201         data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStar
      t)), status)); |  | 
| 2202     if(U_FAILURE(status)) { |  | 
| 2203         return; |  | 
| 2204     } |  | 
| 2205 |  | 
| 2206     data->pos        = data->writableBuffer.getTerminatedBuffer() + strsize; |  | 
| 2207     data->origFlags  = data->flags; |  | 
| 2208     data->flags     |= UCOL_ITER_INNORMBUF; |  | 
| 2209     data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); |  | 
| 2210 } |  | 
| 2211 |  | 
| 2212 /** |  | 
| 2213 * Contraction character management function that returns the next character |  | 
| 2214 * for the forwards iterator. |  | 
| 2215 * Does nothing if the next character is in buffer and not the first character |  | 
| 2216 * in it. |  | 
| 2217 * Else it checks next character in data string to see if it is normalizable. |  | 
| 2218 * If it is not, the character is simply copied into the buffer, else |  | 
| 2219 * the whole normalized substring is copied into the buffer, including the |  | 
| 2220 * current character. |  | 
| 2221 * @param data collation element iterator data |  | 
| 2222 * @return next character |  | 
| 2223 */ |  | 
| 2224 static |  | 
| 2225 inline UChar getNextNormalizedChar(collIterate *data) |  | 
| 2226 { |  | 
| 2227     UChar  nextch; |  | 
| 2228     UChar  ch; |  | 
| 2229     // Here we need to add the iterator code. One problem is the way |  | 
| 2230     // end of string is handled. If we just return next char, it could |  | 
| 2231     // be the sentinel. Most of the cases already check for this, but we |  | 
| 2232     // need to be sure. |  | 
| 2233     if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ) { |  | 
| 2234          /* if no normalization and not in buffer. */ |  | 
| 2235       if(data->flags & UCOL_USE_ITERATOR) { |  | 
| 2236          return (UChar)data->iterator->next(data->iterator); |  | 
| 2237       } else { |  | 
| 2238          return *(data->pos ++); |  | 
| 2239       } |  | 
| 2240     } |  | 
| 2241 |  | 
| 2242     //if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) { |  | 
| 2243       //normalizeIterator(data); |  | 
| 2244     //} |  | 
| 2245 |  | 
| 2246     UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); |  | 
| 2247     if ((innormbuf && *data->pos != 0) || |  | 
| 2248         (data->fcdPosition != NULL && !innormbuf && |  | 
| 2249         data->pos < data->fcdPosition)) { |  | 
| 2250         /* |  | 
| 2251         if next character is in normalized buffer, no further normalization |  | 
| 2252         is required |  | 
| 2253         */ |  | 
| 2254         return *(data->pos ++); |  | 
| 2255     } |  | 
| 2256 |  | 
| 2257     if (data->flags & UCOL_ITER_HASLEN) { |  | 
| 2258         /* in data string */ |  | 
| 2259         if (data->pos + 1 == data->endp) { |  | 
| 2260             return *(data->pos ++); |  | 
| 2261         } |  | 
| 2262         if (data->pos >= data->endp) { |  | 
| 2263             return (UChar) -1; // return U+FFFF (non-char) to indicate an error |  | 
| 2264         } |  | 
| 2265     } |  | 
| 2266     else { |  | 
| 2267         if (innormbuf) { |  | 
| 2268           // inside the normalization buffer, but at the end |  | 
| 2269           // (since we encountered zero). This means, in the |  | 
| 2270           // case we're using char iterator, that we need to |  | 
| 2271           // do another round of normalization. |  | 
| 2272           //if(data->origFlags & UCOL_USE_ITERATOR) { |  | 
| 2273             // we need to restore original flags, |  | 
| 2274             // otherwise, we'll lose them |  | 
| 2275             //data->flags = data->origFlags; |  | 
| 2276             //normalizeIterator(data); |  | 
| 2277             //return *(data->pos++); |  | 
| 2278           //} else { |  | 
| 2279             /* |  | 
| 2280             in writable buffer, at this point fcdPosition can not be |  | 
| 2281             pointing to the end of the data string. see contracting tag. |  | 
| 2282             */ |  | 
| 2283           if(data->fcdPosition) { |  | 
| 2284             if (*(data->fcdPosition + 1) == 0 || |  | 
| 2285                 data->fcdPosition + 1 == data->endp) { |  | 
| 2286                 /* at the end of the string, dump it into the normalizer */ |  | 
| 2287                 data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1; |  | 
| 2288                 // Check if data->pos received a null pointer |  | 
| 2289                 if (data->pos == NULL) { |  | 
| 2290                     return (UChar)-1; // Return to indicate error. |  | 
| 2291                 } |  | 
| 2292                 return *(data->fcdPosition ++); |  | 
| 2293             } |  | 
| 2294             data->pos = data->fcdPosition; |  | 
| 2295           } else if(data->origFlags & UCOL_USE_ITERATOR) { |  | 
| 2296             // if we are here, we're using a normalizing iterator. |  | 
| 2297             // we should just continue further. |  | 
| 2298             data->flags = data->origFlags; |  | 
| 2299             data->pos = NULL; |  | 
| 2300             return (UChar)data->iterator->next(data->iterator); |  | 
| 2301           } |  | 
| 2302           //} |  | 
| 2303         } |  | 
| 2304         else { |  | 
| 2305             if (*(data->pos + 1) == 0) { |  | 
| 2306                 return *(data->pos ++); |  | 
| 2307             } |  | 
| 2308         } |  | 
| 2309     } |  | 
| 2310 |  | 
| 2311     ch = *data->pos ++; |  | 
| 2312     nextch = *data->pos; |  | 
| 2313 |  | 
| 2314     /* |  | 
| 2315     * if the current character is not fcd. |  | 
| 2316     * Trailing combining class == 0. |  | 
| 2317     */ |  | 
| 2318     if ((data->fcdPosition == NULL || data->fcdPosition < data->pos) && |  | 
| 2319         (nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ || |  | 
| 2320          ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) { |  | 
| 2321             /* |  | 
| 2322             Need a more complete FCD check and possible normalization. |  | 
| 2323             normalize substring will be appended to buffer |  | 
| 2324             */ |  | 
| 2325         if (collIterFCD(data)) { |  | 
| 2326             normalizeNextContraction(data); |  | 
| 2327             return *(data->pos ++); |  | 
| 2328         } |  | 
| 2329         else if (innormbuf) { |  | 
| 2330             /* fcdposition shifted even when there's no normalization, if we |  | 
| 2331             don't input the rest into this, we'll get the wrong position when |  | 
| 2332             we reach the end of the writableBuffer */ |  | 
| 2333             int32_t length = (int32_t)(data->fcdPosition - data->pos + 1); |  | 
| 2334             data->pos = insertBufferEnd(data, data->pos - 1, length); |  | 
| 2335             // Check if data->pos received a null pointer |  | 
| 2336             if (data->pos == NULL) { |  | 
| 2337                 return (UChar)-1; // Return to indicate error. |  | 
| 2338             } |  | 
| 2339             return *(data->pos ++); |  | 
| 2340         } |  | 
| 2341     } |  | 
| 2342 |  | 
| 2343     if (innormbuf) { |  | 
| 2344         /* |  | 
| 2345         no normalization is to be done hence only one character will be |  | 
| 2346         appended to the buffer. |  | 
| 2347         */ |  | 
| 2348         data->pos = insertBufferEnd(data, ch) + 1; |  | 
| 2349         // Check if data->pos received a null pointer |  | 
| 2350         if (data->pos == NULL) { |  | 
| 2351             return (UChar)-1; // Return to indicate error. |  | 
| 2352         } |  | 
| 2353     } |  | 
| 2354 |  | 
| 2355     /* points back to the pos in string */ |  | 
| 2356     return ch; |  | 
| 2357 } |  | 
| 2358 |  | 
| 2359 |  | 
| 2360 |  | 
| 2361 /** |  | 
| 2362 * Function to copy the buffer into writableBuffer and sets the fcd position to |  | 
| 2363 * the correct position |  | 
| 2364 * @param source data string source |  | 
| 2365 * @param buffer character buffer |  | 
| 2366 */ |  | 
| 2367 static |  | 
| 2368 inline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &b
      uffer) |  | 
| 2369 { |  | 
| 2370     /* okay confusing part here. to ensure that the skipped characters are |  | 
| 2371     considered later, we need to place it in the appropriate position in the |  | 
| 2372     normalization buffer and reassign the pos pointer. simple case if pos |  | 
| 2373     reside in string, simply copy to normalization buffer and |  | 
| 2374     fcdposition = pos, pos = start of normalization buffer. if pos in |  | 
| 2375     normalization buffer, we'll insert the copy infront of pos and point pos |  | 
| 2376     to the start of the normalization buffer. why am i doing these copies? |  | 
| 2377     well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecial
      CE does |  | 
| 2378     not require any changes, which be really painful. */ |  | 
| 2379     if (source->flags & UCOL_ITER_INNORMBUF) { |  | 
| 2380         int32_t replaceLength = source->pos - source->writableBuffer.getBuffer()
      ; |  | 
| 2381         source->writableBuffer.replace(0, replaceLength, buffer); |  | 
| 2382     } |  | 
| 2383     else { |  | 
| 2384         source->fcdPosition  = source->pos; |  | 
| 2385         source->origFlags    = source->flags; |  | 
| 2386         source->flags       |= UCOL_ITER_INNORMBUF; |  | 
| 2387         source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_IT
      ERATOR); |  | 
| 2388         source->writableBuffer = buffer; |  | 
| 2389     } |  | 
| 2390 |  | 
| 2391     source->pos = source->writableBuffer.getTerminatedBuffer(); |  | 
| 2392 } |  | 
| 2393 |  | 
| 2394 /** |  | 
| 2395 * Function to get the discontiguos collation element within the source. |  | 
| 2396 * Note this function will set the position to the appropriate places. |  | 
| 2397 * @param coll current collator used |  | 
| 2398 * @param source data string source |  | 
| 2399 * @param constart index to the start character in the contraction table |  | 
| 2400 * @return discontiguos collation element offset |  | 
| 2401 */ |  | 
| 2402 static |  | 
| 2403 uint32_t getDiscontiguous(const UCollator *coll, collIterate *source, |  | 
| 2404                                 const UChar *constart) |  | 
| 2405 { |  | 
| 2406     /* source->pos currently points to the second combining character after |  | 
| 2407        the start character */ |  | 
| 2408           const UChar *temppos      = source->pos; |  | 
| 2409           UnicodeString buffer; |  | 
| 2410     const UChar   *tempconstart = constart; |  | 
| 2411           uint8_t  tempflags    = source->flags; |  | 
| 2412           UBool    multicontraction = FALSE; |  | 
| 2413           collIterateState discState; |  | 
| 2414 |  | 
| 2415           backupState(source, &discState); |  | 
| 2416 |  | 
| 2417     buffer.setTo(peekCodePoint(source, -1)); |  | 
| 2418     for (;;) { |  | 
| 2419         UChar    *UCharOffset; |  | 
| 2420         UChar     schar, |  | 
| 2421                   tchar; |  | 
| 2422         uint32_t  result; |  | 
| 2423 |  | 
| 2424         if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp) |  | 
| 2425             || (peekCodeUnit(source, 0) == 0  && |  | 
| 2426             //|| (*source->pos == 0  && |  | 
| 2427                 ((source->flags & UCOL_ITER_INNORMBUF) == 0 || |  | 
| 2428                  source->fcdPosition == NULL || |  | 
| 2429                  source->fcdPosition == source->endp || |  | 
| 2430                  *(source->fcdPosition) == 0 || |  | 
| 2431                  u_getCombiningClass(*(source->fcdPosition)) == 0)) || |  | 
| 2432                  /* end of string in null terminated string or stopped by a |  | 
| 2433                  null character, note fcd does not always point to a base |  | 
| 2434                  character after the discontiguos change */ |  | 
| 2435                  u_getCombiningClass(peekCodePoint(source, 0)) == 0) { |  | 
| 2436                  //u_getCombiningClass(*(source->pos)) == 0) { |  | 
| 2437             //constart = (UChar *)coll->image + getContractOffset(CE); |  | 
| 2438             if (multicontraction) { |  | 
| 2439                 source->pos    = temppos - 1; |  | 
| 2440                 setDiscontiguosAttribute(source, buffer); |  | 
| 2441                 return *(coll->contractionCEs + |  | 
| 2442                                     (tempconstart - coll->contractionIndex)); |  | 
| 2443             } |  | 
| 2444             constart = tempconstart; |  | 
| 2445             break; |  | 
| 2446         } |  | 
| 2447 |  | 
| 2448         UCharOffset = (UChar *)(tempconstart + 1); /* skip the backward offset*/ |  | 
| 2449         schar = getNextNormalizedChar(source); |  | 
| 2450 |  | 
| 2451         while (schar > (tchar = *UCharOffset)) { |  | 
| 2452             UCharOffset++; |  | 
| 2453         } |  | 
| 2454 |  | 
| 2455         if (schar != tchar) { |  | 
| 2456             /* not the correct codepoint. we stuff the current codepoint into |  | 
| 2457             the discontiguos buffer and try the next character */ |  | 
| 2458             buffer.append(schar); |  | 
| 2459             continue; |  | 
| 2460         } |  | 
| 2461         else { |  | 
| 2462             if (u_getCombiningClass(schar) == |  | 
| 2463                 u_getCombiningClass(peekCodePoint(source, -2))) { |  | 
| 2464                 buffer.append(schar); |  | 
| 2465                 continue; |  | 
| 2466             } |  | 
| 2467             result = *(coll->contractionCEs + |  | 
| 2468                                       (UCharOffset - coll->contractionIndex)); |  | 
| 2469         } |  | 
| 2470 |  | 
| 2471         if (result == UCOL_NOT_FOUND) { |  | 
| 2472           break; |  | 
| 2473         } else if (isContraction(result)) { |  | 
| 2474             /* this is a multi-contraction*/ |  | 
| 2475             tempconstart = (UChar *)coll->image + getContractOffset(result); |  | 
| 2476             if (*(coll->contractionCEs + (constart - coll->contractionIndex)) |  | 
| 2477                 != UCOL_NOT_FOUND) { |  | 
| 2478                 multicontraction = TRUE; |  | 
| 2479                 temppos       = source->pos + 1; |  | 
| 2480             } |  | 
| 2481         } else { |  | 
| 2482             setDiscontiguosAttribute(source, buffer); |  | 
| 2483             return result; |  | 
| 2484         } |  | 
| 2485     } |  | 
| 2486 |  | 
| 2487     /* no problems simply reverting just like that, |  | 
| 2488     if we are in string before getting into this function, points back to |  | 
| 2489     string hence no problem. |  | 
| 2490     if we are in normalization buffer before getting into this function, |  | 
| 2491     since we'll never use another normalization within this function, we |  | 
| 2492     know that fcdposition points to a base character. the normalization buffer |  | 
| 2493     never change, hence this revert works. */ |  | 
| 2494     loadState(source, &discState, TRUE); |  | 
| 2495     goBackOne(source); |  | 
| 2496 |  | 
| 2497     //source->pos   = temppos - 1; |  | 
| 2498     source->flags = tempflags; |  | 
| 2499     return *(coll->contractionCEs + (constart - coll->contractionIndex)); |  | 
| 2500 } |  | 
| 2501 |  | 
| 2502 /* now uses Mark's getImplicitPrimary code */ |  | 
| 2503 static |  | 
| 2504 inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) { |  | 
| 2505     uint32_t r = uprv_uca_getImplicitPrimary(cp); |  | 
| 2506     *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0; |  | 
| 2507     collationSource->offsetRepeatCount += 1; |  | 
| 2508     return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order' |  | 
| 2509 } |  | 
| 2510 |  | 
| 2511 /** |  | 
| 2512 * Inserts the argument character into the front of the buffer replacing the |  | 
| 2513 * front null terminator. |  | 
| 2514 * @param data collation element iterator data |  | 
| 2515 * @param ch character to be appended |  | 
| 2516 */ |  | 
| 2517 static |  | 
| 2518 inline void insertBufferFront(collIterate *data, UChar ch) |  | 
| 2519 { |  | 
| 2520     data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTer
      minatedBuffer() + 2; |  | 
| 2521 } |  | 
| 2522 |  | 
| 2523 /** |  | 
| 2524 * Special normalization function for contraction in the previous iterator. |  | 
| 2525 * This normalization sequence will place the current character at source->pos |  | 
| 2526 * and its following normalized sequence into the buffer. |  | 
| 2527 * The fcd position, pos will be changed. |  | 
| 2528 * pos will now point to positions in the buffer. |  | 
| 2529 * Flags will be changed accordingly. |  | 
| 2530 * @param data collation iterator data |  | 
| 2531 */ |  | 
| 2532 static |  | 
| 2533 inline void normalizePrevContraction(collIterate *data, UErrorCode *status) |  | 
| 2534 { |  | 
| 2535     const UChar *pEnd = data->pos + 1;         /* End normalize + 1 */ |  | 
| 2536     const UChar *pStart; |  | 
| 2537 |  | 
| 2538     UnicodeString endOfBuffer; |  | 
| 2539     if (data->flags & UCOL_ITER_HASLEN) { |  | 
| 2540         /* |  | 
| 2541         normalization buffer not used yet, we'll pull down the next |  | 
| 2542         character into the end of the buffer |  | 
| 2543         */ |  | 
| 2544         endOfBuffer.setTo(*pEnd); |  | 
| 2545     } |  | 
| 2546     else { |  | 
| 2547         endOfBuffer.setTo(data->writableBuffer, 1);  // after the leading NUL |  | 
| 2548     } |  | 
| 2549 |  | 
| 2550     if (data->fcdPosition == NULL) { |  | 
| 2551         pStart = data->string; |  | 
| 2552     } |  | 
| 2553     else { |  | 
| 2554         pStart = data->fcdPosition + 1; |  | 
| 2555     } |  | 
| 2556     int32_t normLen = |  | 
| 2557         data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStar
      t)), |  | 
| 2558                              data->writableBuffer, |  | 
| 2559                              *status). |  | 
| 2560         length(); |  | 
| 2561     if(U_FAILURE(*status)) { |  | 
| 2562         return; |  | 
| 2563     } |  | 
| 2564     /* |  | 
| 2565     this puts the null termination infront of the normalized string instead |  | 
| 2566     of the end |  | 
| 2567     */ |  | 
| 2568     data->pos = |  | 
| 2569         data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminat
      edBuffer() + |  | 
| 2570         1 + normLen; |  | 
| 2571     data->origFlags  = data->flags; |  | 
| 2572     data->flags     |= UCOL_ITER_INNORMBUF; |  | 
| 2573     data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); |  | 
| 2574 } |  | 
| 2575 |  | 
| 2576 /** |  | 
| 2577 * Contraction character management function that returns the previous character |  | 
| 2578 * for the backwards iterator. |  | 
| 2579 * Does nothing if the previous character is in buffer and not the first |  | 
| 2580 * character in it. |  | 
| 2581 * Else it checks previous character in data string to see if it is |  | 
| 2582 * normalizable. |  | 
| 2583 * If it is not, the character is simply copied into the buffer, else |  | 
| 2584 * the whole normalized substring is copied into the buffer, including the |  | 
| 2585 * current character. |  | 
| 2586 * @param data collation element iterator data |  | 
| 2587 * @return previous character |  | 
| 2588 */ |  | 
| 2589 static |  | 
| 2590 inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status) |  | 
| 2591 { |  | 
| 2592     UChar  prevch; |  | 
| 2593     UChar  ch; |  | 
| 2594     const UChar *start; |  | 
| 2595     UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF); |  | 
| 2596     if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 || |  | 
| 2597         (innormbuf && *(data->pos - 1) != 0)) { |  | 
| 2598         /* |  | 
| 2599         if no normalization. |  | 
| 2600         if previous character is in normalized buffer, no further normalization |  | 
| 2601         is required |  | 
| 2602         */ |  | 
| 2603       if(data->flags & UCOL_USE_ITERATOR) { |  | 
| 2604         data->iterator->move(data->iterator, -1, UITER_CURRENT); |  | 
| 2605         return (UChar)data->iterator->next(data->iterator); |  | 
| 2606       } else { |  | 
| 2607         return *(data->pos - 1); |  | 
| 2608       } |  | 
| 2609     } |  | 
| 2610 |  | 
| 2611     start = data->pos; |  | 
| 2612     if ((data->fcdPosition==NULL)||(data->flags & UCOL_ITER_HASLEN)) { |  | 
| 2613         /* in data string */ |  | 
| 2614         if ((start - 1) == data->string) { |  | 
| 2615             return *(start - 1); |  | 
| 2616         } |  | 
| 2617         start --; |  | 
| 2618         ch     = *start; |  | 
| 2619         prevch = *(start - 1); |  | 
| 2620     } |  | 
| 2621     else { |  | 
| 2622         /* |  | 
| 2623         in writable buffer, at this point fcdPosition can not be NULL. |  | 
| 2624         see contracting tag. |  | 
| 2625         */ |  | 
| 2626         if (data->fcdPosition == data->string) { |  | 
| 2627             /* at the start of the string, just dump it into the normalizer */ |  | 
| 2628             insertBufferFront(data, *(data->fcdPosition)); |  | 
| 2629             data->fcdPosition = NULL; |  | 
| 2630             return *(data->pos - 1); |  | 
| 2631         } |  | 
| 2632         start  = data->fcdPosition; |  | 
| 2633         ch     = *start; |  | 
| 2634         prevch = *(start - 1); |  | 
| 2635     } |  | 
| 2636     /* |  | 
| 2637     * if the current character is not fcd. |  | 
| 2638     * Trailing combining class == 0. |  | 
| 2639     */ |  | 
| 2640     if (data->fcdPosition > start && |  | 
| 2641        (ch >= NFC_ZERO_CC_BLOCK_LIMIT_ || prevch >= NFC_ZERO_CC_BLOCK_LIMIT_)) |  | 
| 2642     { |  | 
| 2643         /* |  | 
| 2644         Need a more complete FCD check and possible normalization. |  | 
| 2645         normalize substring will be appended to buffer |  | 
| 2646         */ |  | 
| 2647         const UChar *backuppos = data->pos; |  | 
| 2648         data->pos = start; |  | 
| 2649         if (collPrevIterFCD(data)) { |  | 
| 2650             normalizePrevContraction(data, status); |  | 
| 2651             return *(data->pos - 1); |  | 
| 2652         } |  | 
| 2653         data->pos = backuppos; |  | 
| 2654         data->fcdPosition ++; |  | 
| 2655     } |  | 
| 2656 |  | 
| 2657     if (innormbuf) { |  | 
| 2658     /* |  | 
| 2659     no normalization is to be done hence only one character will be |  | 
| 2660     appended to the buffer. |  | 
| 2661     */ |  | 
| 2662         insertBufferFront(data, ch); |  | 
| 2663         data->fcdPosition --; |  | 
| 2664     } |  | 
| 2665 |  | 
| 2666     return ch; |  | 
| 2667 } |  | 
| 2668 |  | 
| 2669 /* This function handles the special CEs like contractions, expansions, surrogat
      es, Thai */ |  | 
| 2670 /* It is called by getNextCE */ |  | 
| 2671 |  | 
| 2672 /* The following should be even */ |  | 
| 2673 #define UCOL_MAX_DIGITS_FOR_NUMBER 254 |  | 
| 2674 |  | 
| 2675 uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
      lIterate *source, UErrorCode *status) { |  | 
| 2676     collIterateState entryState; |  | 
| 2677     backupState(source, &entryState); |  | 
| 2678     UChar32 cp = ch; |  | 
| 2679 |  | 
| 2680     for (;;) { |  | 
| 2681         // This loop will repeat only in the case of contractions, and only when
       a contraction |  | 
| 2682         //   is found and the first CE resulting from that contraction is itself
       a special |  | 
| 2683         //   (an expansion, for example.)  All other special CE types are fully 
      handled the |  | 
| 2684         //   first time through, and the loop exits. |  | 
| 2685 |  | 
| 2686         const uint32_t *CEOffset = NULL; |  | 
| 2687         switch(getCETag(CE)) { |  | 
| 2688         case NOT_FOUND_TAG: |  | 
| 2689             /* This one is not found, and we'll let somebody else bother about i
      t... no more games */ |  | 
| 2690             return CE; |  | 
| 2691         case SPEC_PROC_TAG: |  | 
| 2692             { |  | 
| 2693                 // Special processing is getting a CE that is preceded by a cert
      ain prefix |  | 
| 2694                 // Currently this is only needed for optimizing Japanese length 
      and iteration marks. |  | 
| 2695                 // When we encouter a special processing tag, we go backwards an
      d try to see if |  | 
| 2696                 // we have a match. |  | 
| 2697                 // Contraction tables are used - so the whole process is not unl
      ike contraction. |  | 
| 2698                 // prefix data is stored backwards in the table. |  | 
| 2699                 const UChar *UCharOffset; |  | 
| 2700                 UChar schar, tchar; |  | 
| 2701                 collIterateState prefixState; |  | 
| 2702                 backupState(source, &prefixState); |  | 
| 2703                 loadState(source, &entryState, TRUE); |  | 
| 2704                 goBackOne(source); // We want to look at the point where we ente
      red - actually one |  | 
| 2705                 // before that... |  | 
| 2706 |  | 
| 2707                 for(;;) { |  | 
| 2708                     // This loop will run once per source string character, for 
      as long as we |  | 
| 2709                     //  are matching a potential contraction sequence |  | 
| 2710 |  | 
| 2711                     // First we position ourselves at the begining of contractio
      n sequence |  | 
| 2712                     const UChar *ContractionStart = UCharOffset = (UChar *)coll-
      >image+getContractOffset(CE); |  | 
| 2713                     if (collIter_bos(source)) { |  | 
| 2714                         CE = *(coll->contractionCEs + (UCharOffset - coll->contr
      actionIndex)); |  | 
| 2715                         break; |  | 
| 2716                     } |  | 
| 2717                     schar = getPrevNormalizedChar(source, status); |  | 
| 2718                     goBackOne(source); |  | 
| 2719 |  | 
| 2720                     while(schar > (tchar = *UCharOffset)) { /* since the contrac
      tion codepoints should be ordered, we skip all that are smaller */ |  | 
| 2721                         UCharOffset++; |  | 
| 2722                     } |  | 
| 2723 |  | 
| 2724                     if (schar == tchar) { |  | 
| 2725                         // Found the source string char in the table. |  | 
| 2726                         //  Pick up the corresponding CE from the table. |  | 
| 2727                         CE = *(coll->contractionCEs + |  | 
| 2728                             (UCharOffset - coll->contractionIndex)); |  | 
| 2729                     } |  | 
| 2730                     else |  | 
| 2731                     { |  | 
| 2732                         // Source string char was not in the table. |  | 
| 2733                         //   We have not found the prefix. |  | 
| 2734                         CE = *(coll->contractionCEs + |  | 
| 2735                             (ContractionStart - coll->contractionIndex)); |  | 
| 2736                     } |  | 
| 2737 |  | 
| 2738                     if(!isPrefix(CE)) { |  | 
| 2739                         // The source string char was in the contraction table, 
      and the corresponding |  | 
| 2740                         //   CE is not a prefix CE.  We found the prefix, break |  | 
| 2741                         //   out of loop, this CE will end up being returned.  T
      his is the normal |  | 
| 2742                         //   way out of prefix handling when the source actually
       contained |  | 
| 2743                         //   the prefix. |  | 
| 2744                         break; |  | 
| 2745                     } |  | 
| 2746                 } |  | 
| 2747                 if(CE != UCOL_NOT_FOUND) { // we found something and we can meri
      lly continue |  | 
| 2748                     loadState(source, &prefixState, TRUE); |  | 
| 2749                     if(source->origFlags & UCOL_USE_ITERATOR) { |  | 
| 2750                         source->flags = source->origFlags; |  | 
| 2751                     } |  | 
| 2752                 } else { // prefix search was a failure, we have to backup all t
      he way to the start |  | 
| 2753                     loadState(source, &entryState, TRUE); |  | 
| 2754                 } |  | 
| 2755                 break; |  | 
| 2756             } |  | 
| 2757         case CONTRACTION_TAG: |  | 
| 2758             { |  | 
| 2759                 /* This should handle contractions */ |  | 
| 2760                 collIterateState state; |  | 
| 2761                 backupState(source, &state); |  | 
| 2762                 uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->imag
      e+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND; |  | 
| 2763                 const UChar *UCharOffset; |  | 
| 2764                 UChar schar, tchar; |  | 
| 2765 |  | 
| 2766                 for (;;) { |  | 
| 2767                     /* This loop will run once per source string character, for 
      as long as we     */ |  | 
| 2768                     /*  are matching a potential contraction sequence           
             */ |  | 
| 2769 |  | 
| 2770                     /* First we position ourselves at the begining of contractio
      n sequence */ |  | 
| 2771                     const UChar *ContractionStart = UCharOffset = (UChar *)coll-
      >image+getContractOffset(CE); |  | 
| 2772 |  | 
| 2773                     if (collIter_eos(source)) { |  | 
| 2774                         // Ran off the end of the source string. |  | 
| 2775                         CE = *(coll->contractionCEs + (UCharOffset - coll->contr
      actionIndex)); |  | 
| 2776                         // So we'll pick whatever we have at the point... |  | 
| 2777                         if (CE == UCOL_NOT_FOUND) { |  | 
| 2778                             // back up the source over all the chars we scanned 
      going into this contraction. |  | 
| 2779                             CE = firstCE; |  | 
| 2780                             loadState(source, &state, TRUE); |  | 
| 2781                             if(source->origFlags & UCOL_USE_ITERATOR) { |  | 
| 2782                                 source->flags = source->origFlags; |  | 
| 2783                             } |  | 
| 2784                         } |  | 
| 2785                         break; |  | 
| 2786                     } |  | 
| 2787 |  | 
| 2788                     uint8_t maxCC = (uint8_t)(*(UCharOffset)&0xFF); /*get the di
      scontiguos stuff */ /* skip the backward offset, see above */ |  | 
| 2789                     uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8); |  | 
| 2790 |  | 
| 2791                     schar = getNextNormalizedChar(source); |  | 
| 2792                     while(schar > (tchar = *UCharOffset)) { /* since the contrac
      tion codepoints should be ordered, we skip all that are smaller */ |  | 
| 2793                         UCharOffset++; |  | 
| 2794                     } |  | 
| 2795 |  | 
| 2796                     if (schar == tchar) { |  | 
| 2797                         // Found the source string char in the contraction table
      . |  | 
| 2798                         //  Pick up the corresponding CE from the table. |  | 
| 2799                         CE = *(coll->contractionCEs + |  | 
| 2800                             (UCharOffset - coll->contractionIndex)); |  | 
| 2801                     } |  | 
| 2802                     else |  | 
| 2803                     { |  | 
| 2804                         // Source string char was not in contraction table. |  | 
| 2805                         //   Unless we have a discontiguous contraction, we have
       finished |  | 
| 2806                         //   with this contraction. |  | 
| 2807                         // in order to do the proper detection, we |  | 
| 2808                         // need to see if we're dealing with a supplementary |  | 
| 2809                         /* We test whether the next two char are surrogate pairs
      . |  | 
| 2810                         * This test is done if the iterator is not NULL. |  | 
| 2811                         * If there is no surrogate pair, the iterator |  | 
| 2812                         * goes back one if needed. */ |  | 
| 2813                         UChar32 miss = schar; |  | 
| 2814                         if (source->iterator) { |  | 
| 2815                             UChar32 surrNextChar; /* the next char in the iterat
      ion to test */ |  | 
| 2816                             int32_t prevPos; /* holds the previous position befo
      re move forward of the source iterator */ |  | 
| 2817                             if(U16_IS_LEAD(schar) && source->iterator->hasNext(s
      ource->iterator)) { |  | 
| 2818                                 prevPos = source->iterator->index; |  | 
| 2819                                 surrNextChar = getNextNormalizedChar(source); |  | 
| 2820                                 if (U16_IS_TRAIL(surrNextChar)) { |  | 
| 2821                                     miss = U16_GET_SUPPLEMENTARY(schar, surrNext
      Char); |  | 
| 2822                                 } else if (prevPos < source->iterator->index){ |  | 
| 2823                                     goBackOne(source); |  | 
| 2824                                 } |  | 
| 2825                             } |  | 
| 2826                         } else if (U16_IS_LEAD(schar) && source->pos + 1 < sourc
      e->endp) { |  | 
| 2827                             const UChar* prevPos = source->pos; |  | 
| 2828                             UChar nextChar = getNextNormalizedChar(source); |  | 
| 2829                             if (U16_IS_TRAIL(nextChar)) { |  | 
| 2830                                 miss = U16_GET_SUPPLEMENTARY(schar, nextChar); |  | 
| 2831                             } else if (prevPos < source->pos) { |  | 
| 2832                                 goBackOne(source); |  | 
| 2833                             } |  | 
| 2834                         } |  | 
| 2835 |  | 
| 2836                         uint8_t sCC; |  | 
| 2837                         if (miss < 0x300 || |  | 
| 2838                             maxCC == 0 || |  | 
| 2839                             (sCC = i_getCombiningClass(miss, coll)) == 0 || |  | 
| 2840                             sCC>maxCC || |  | 
| 2841                             (allSame != 0 && sCC == maxCC) || |  | 
| 2842                             collIter_eos(source)) |  | 
| 2843                         { |  | 
| 2844                             //  Contraction can not be discontiguous. |  | 
| 2845                             goBackOne(source);  // back up the source string by 
      one, |  | 
| 2846                             //  because  the character we just looked at was |  | 
| 2847                             //  not part of the contraction.   */ |  | 
| 2848                             if(U_IS_SUPPLEMENTARY(miss)) { |  | 
| 2849                                 goBackOne(source); |  | 
| 2850                             } |  | 
| 2851                             CE = *(coll->contractionCEs + |  | 
| 2852                                 (ContractionStart - coll->contractionIndex)); |  | 
| 2853                         } else { |  | 
| 2854                             // |  | 
| 2855                             // Contraction is possibly discontiguous. |  | 
| 2856                             //   Scan more of source string looking for a match |  | 
| 2857                             // |  | 
| 2858                             UChar tempchar; |  | 
| 2859                             /* find the next character if schar is not a base ch
      aracter |  | 
| 2860                             and we are not yet at the end of the string */ |  | 
| 2861                             tempchar = getNextNormalizedChar(source); |  | 
| 2862                             // probably need another supplementary thingie here |  | 
| 2863                             goBackOne(source); |  | 
| 2864                             if (i_getCombiningClass(tempchar, coll) == 0) { |  | 
| 2865                                 goBackOne(source); |  | 
| 2866                                 if(U_IS_SUPPLEMENTARY(miss)) { |  | 
| 2867                                     goBackOne(source); |  | 
| 2868                                 } |  | 
| 2869                                 /* Spit out the last char of the string, wasn't 
      tasty enough */ |  | 
| 2870                                 CE = *(coll->contractionCEs + |  | 
| 2871                                     (ContractionStart - coll->contractionIndex))
      ; |  | 
| 2872                             } else { |  | 
| 2873                                 CE = getDiscontiguous(coll, source, ContractionS
      tart); |  | 
| 2874                             } |  | 
| 2875                         } |  | 
| 2876                     } // else after if(schar == tchar) |  | 
| 2877 |  | 
| 2878                     if(CE == UCOL_NOT_FOUND) { |  | 
| 2879                         /* The Source string did not match the contraction that 
      we were checking.  */ |  | 
| 2880                         /*  Back up the source position to undo the effects of h
      aving partially    */ |  | 
| 2881                         /*   scanned through what ultimately proved to not be a 
      contraction.       */ |  | 
| 2882                         loadState(source, &state, TRUE); |  | 
| 2883                         CE = firstCE; |  | 
| 2884                         break; |  | 
| 2885                     } |  | 
| 2886 |  | 
| 2887                     if(!isContraction(CE)) { |  | 
| 2888                         // The source string char was in the contraction table, 
      and the corresponding |  | 
| 2889                         //   CE is not a contraction CE.  We completed the contr
      action, break |  | 
| 2890                         //   out of loop, this CE will end up being returned.  T
      his is the normal |  | 
| 2891                         //   way out of contraction handling when the source act
      ually contained |  | 
| 2892                         //   the contraction. |  | 
| 2893                         break; |  | 
| 2894                     } |  | 
| 2895 |  | 
| 2896 |  | 
| 2897                     // The source string char was in the contraction table, and 
      the corresponding |  | 
| 2898                     //   CE is IS  a contraction CE.  We will continue looping t
      o check the source |  | 
| 2899                     //   string for the remaining chars in the contraction. |  | 
| 2900                     uint32_t tempCE = *(coll->contractionCEs + (ContractionStart
       - coll->contractionIndex)); |  | 
| 2901                     if(tempCE != UCOL_NOT_FOUND) { |  | 
| 2902                         // We have scanned a a section of source string for whic
      h there is a |  | 
| 2903                         //  CE from the contraction table.  Remember the CE and 
      scan position, so |  | 
| 2904                         //  that we can return to this point if further scanning
       fails to |  | 
| 2905                         //  match a longer contraction sequence. |  | 
| 2906                         firstCE = tempCE; |  | 
| 2907 |  | 
| 2908                         goBackOne(source); |  | 
| 2909                         backupState(source, &state); |  | 
| 2910                         getNextNormalizedChar(source); |  | 
| 2911 |  | 
| 2912                         // Another way to do this is: |  | 
| 2913                         //collIterateState tempState; |  | 
| 2914                         //backupState(source, &tempState); |  | 
| 2915                         //goBackOne(source); |  | 
| 2916                         //backupState(source, &state); |  | 
| 2917                         //loadState(source, &tempState, TRUE); |  | 
| 2918 |  | 
| 2919                         // The problem is that for incomplete contractions we ha
      ve to remember the previous |  | 
| 2920                         // position. Before, the only thing I needed to do was s
      tate.pos--; |  | 
| 2921                         // After iterator introduction and especially after intr
      oduction of normalizing |  | 
| 2922                         // iterators, it became much more difficult to decrease 
      the saved state. |  | 
| 2923                         // I'm not yet sure which of the two methods above is fa
      ster. |  | 
| 2924                     } |  | 
| 2925                 } // for(;;) |  | 
| 2926                 break; |  | 
| 2927             } // case CONTRACTION_TAG: |  | 
| 2928         case LONG_PRIMARY_TAG: |  | 
| 2929             { |  | 
| 2930                 *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER; |  | 
| 2931                 CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYT
      E_COMMON; |  | 
| 2932                 source->offsetRepeatCount += 1; |  | 
| 2933                 return CE; |  | 
| 2934             } |  | 
| 2935         case EXPANSION_TAG: |  | 
| 2936             { |  | 
| 2937                 /* This should handle expansion. */ |  | 
| 2938                 /* NOTE: we can encounter both continuations and expansions in a
      n expansion! */ |  | 
| 2939                 /* I have to decide where continuations are going to be dealt wi
      th */ |  | 
| 2940                 uint32_t size; |  | 
| 2941                 uint32_t i;    /* general counter */ |  | 
| 2942 |  | 
| 2943                 CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* fi
      nd the offset to expansion table */ |  | 
| 2944                 size = getExpansionCount(CE); |  | 
| 2945                 CE = *CEOffset++; |  | 
| 2946               //source->offsetRepeatCount = -1; |  | 
| 2947 |  | 
| 2948                 if(size != 0) { /* if there are less than 16 elements in expansi
      on, we don't terminate */ |  | 
| 2949                     for(i = 1; i<size; i++) { |  | 
| 2950                         *(source->CEpos++) = *CEOffset++; |  | 
| 2951                         source->offsetRepeatCount += 1; |  | 
| 2952                     } |  | 
| 2953                 } else { /* else, we do */ |  | 
| 2954                     while(*CEOffset != 0) { |  | 
| 2955                         *(source->CEpos++) = *CEOffset++; |  | 
| 2956                         source->offsetRepeatCount += 1; |  | 
| 2957                     } |  | 
| 2958                 } |  | 
| 2959 |  | 
| 2960                 return CE; |  | 
| 2961             } |  | 
| 2962         case DIGIT_TAG: |  | 
| 2963             { |  | 
| 2964                 /* |  | 
| 2965                 We do a check to see if we want to collate digits as numbers; if
       so we generate |  | 
| 2966                 a custom collation key. Otherwise we pull out the value stored i
      n the expansion table. |  | 
| 2967                 */ |  | 
| 2968                 //uint32_t size; |  | 
| 2969                 uint32_t i;    /* general counter */ |  | 
| 2970 |  | 
| 2971                 if (source->coll->numericCollation == UCOL_ON){ |  | 
| 2972                     collIterateState digitState = {0,0,0,0,0,0,0,0,0}; |  | 
| 2973                     UChar32 char32 = 0; |  | 
| 2974                     int32_t digVal = 0; |  | 
| 2975 |  | 
| 2976                     uint32_t digIndx = 0; |  | 
| 2977                     uint32_t endIndex = 0; |  | 
| 2978                     uint32_t trailingZeroIndex = 0; |  | 
| 2979 |  | 
| 2980                     uint8_t collateVal = 0; |  | 
| 2981 |  | 
| 2982                     UBool nonZeroValReached = FALSE; |  | 
| 2983 |  | 
| 2984                     uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I j
      ust need a temporary place to store my generated CEs. |  | 
| 2985                     /* |  | 
| 2986                          We parse the source string until we hit a char that's N
      OT a digit. |  | 
| 2987                         Use this u_charDigitValue. This might be slow because we
       have to |  | 
| 2988                         handle surrogates... |  | 
| 2989                     */ |  | 
| 2990             /* |  | 
| 2991                     if (U16_IS_LEAD(ch)){ |  | 
| 2992                       if (!collIter_eos(source)) { |  | 
| 2993                         backupState(source, &digitState); |  | 
| 2994                         UChar trail = getNextNormalizedChar(source); |  | 
| 2995                         if(U16_IS_TRAIL(trail)) { |  | 
| 2996                           char32 = U16_GET_SUPPLEMENTARY(ch, trail); |  | 
| 2997                         } else { |  | 
| 2998                           loadState(source, &digitState, TRUE); |  | 
| 2999                           char32 = ch; |  | 
| 3000                         } |  | 
| 3001                       } else { |  | 
| 3002                         char32 = ch; |  | 
| 3003                       } |  | 
| 3004                     } else { |  | 
| 3005                       char32 = ch; |  | 
| 3006                     } |  | 
| 3007                     digVal = u_charDigitValue(char32); |  | 
| 3008             */ |  | 
| 3009                     digVal = u_charDigitValue(cp); // if we have arrived here, w
      e have |  | 
| 3010                     // already processed possible supplementaries that trigered 
      the digit tag - |  | 
| 3011                     // all supplementaries are marked in the UCA. |  | 
| 3012                     /* |  | 
| 3013                         We  pad a zero in front of the first element anyways. Th
      is takes |  | 
| 3014                         care of the (probably) most common case where people are
       sorting things followed |  | 
| 3015                         by a single digit |  | 
| 3016                     */ |  | 
| 3017                     digIndx++; |  | 
| 3018                     for(;;){ |  | 
| 3019                         // Make sure we have enough space. No longer needed; |  | 
| 3020                         // at this point digIndx now has a max value of UCOL_MAX
      _DIGITS_FOR_NUMBER |  | 
| 3021                         // (it has been pre-incremented) so we just ensure that 
      numTempBuf is big enough |  | 
| 3022                         // (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3). |  | 
| 3023 |  | 
| 3024                         // Skipping over leading zeroes. |  | 
| 3025                         if (digVal != 0) { |  | 
| 3026                             nonZeroValReached = TRUE; |  | 
| 3027                         } |  | 
| 3028                         if (nonZeroValReached) { |  | 
| 3029                             /* |  | 
| 3030                             We parse the digit string into base 100 numbers (thi
      s fits into a byte). |  | 
| 3031                             We only add to the buffer in twos, thus if we are pa
      rsing an odd character, |  | 
| 3032                             that serves as the 'tens' digit while the if we are 
      parsing an even one, that |  | 
| 3033                             is the 'ones' digit. We dumped the parsed base 100 v
      alue (collateVal) into |  | 
| 3034                             a buffer. We multiply each collateVal by 2 (to give 
      us room) and add 5 (to avoid |  | 
| 3035                             overlapping magic CE byte values). The last byte we 
      subtract 1 to ensure it is less |  | 
| 3036                             than all the other bytes. |  | 
| 3037                             */ |  | 
| 3038 |  | 
| 3039                             if (digIndx % 2 == 1){ |  | 
| 3040                                 collateVal += (uint8_t)digVal; |  | 
| 3041 |  | 
| 3042                                 // We don't enter the low-order-digit case unles
      s we've already seen |  | 
| 3043                                 // the high order, or for the first digit, which
       is always non-zero. |  | 
| 3044                                 if (collateVal != 0) |  | 
| 3045                                     trailingZeroIndex = 0; |  | 
| 3046 |  | 
| 3047                                 numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; |  | 
| 3048                                 collateVal = 0; |  | 
| 3049                             } |  | 
| 3050                             else{ |  | 
| 3051                                 // We drop the collation value into the buffer s
      o if we need to do |  | 
| 3052                                 // a "front patch" we don't have to check to see
       if we're hitting the |  | 
| 3053                                 // last element. |  | 
| 3054                                 collateVal = (uint8_t)(digVal * 10); |  | 
| 3055 |  | 
| 3056                                 // Check for trailing zeroes. |  | 
| 3057                                 if (collateVal == 0) |  | 
| 3058                                 { |  | 
| 3059                                     if (!trailingZeroIndex) |  | 
| 3060                                         trailingZeroIndex = (digIndx/2) + 2; |  | 
| 3061                                 } |  | 
| 3062                                 else |  | 
| 3063                                     trailingZeroIndex = 0; |  | 
| 3064 |  | 
| 3065                                 numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6; |  | 
| 3066                             } |  | 
| 3067                             digIndx++; |  | 
| 3068                         } |  | 
| 3069 |  | 
| 3070                         // Get next character. |  | 
| 3071                         if (!collIter_eos(source)){ |  | 
| 3072                             ch = getNextNormalizedChar(source); |  | 
| 3073                             if (U16_IS_LEAD(ch)){ |  | 
| 3074                                 if (!collIter_eos(source)) { |  | 
| 3075                                     backupState(source, &digitState); |  | 
| 3076                                     UChar trail = getNextNormalizedChar(source); |  | 
| 3077                                     if(U16_IS_TRAIL(trail)) { |  | 
| 3078                                         char32 = U16_GET_SUPPLEMENTARY(ch, trail
      ); |  | 
| 3079                                     } else { |  | 
| 3080                                         loadState(source, &digitState, TRUE); |  | 
| 3081                                         char32 = ch; |  | 
| 3082                                     } |  | 
| 3083                                 } |  | 
| 3084                             } else { |  | 
| 3085                                 char32 = ch; |  | 
| 3086                             } |  | 
| 3087 |  | 
| 3088                             if ((digVal = u_charDigitValue(char32)) == -1 || dig
      Indx > UCOL_MAX_DIGITS_FOR_NUMBER){ |  | 
| 3089                                 // Resetting position to point to the next unpro
      cessed char. We |  | 
| 3090                                 // overshot it when doing our test/set for numbe
      rs. |  | 
| 3091                                 if (char32 > 0xFFFF) { // For surrogates. |  | 
| 3092                                     loadState(source, &digitState, TRUE); |  | 
| 3093                                     //goBackOne(source); |  | 
| 3094                                 } |  | 
| 3095                                 goBackOne(source); |  | 
| 3096                                 break; |  | 
| 3097                             } |  | 
| 3098                         } else { |  | 
| 3099                             break; |  | 
| 3100                         } |  | 
| 3101                     } |  | 
| 3102 |  | 
| 3103                     if (nonZeroValReached == FALSE){ |  | 
| 3104                         digIndx = 2; |  | 
| 3105                         numTempBuf[2] = 6; |  | 
| 3106                     } |  | 
| 3107 |  | 
| 3108                     endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx
      /2) + 2) ; |  | 
| 3109                     if (digIndx % 2 != 0){ |  | 
| 3110                         /* |  | 
| 3111                         We missed a value. Since digIndx isn't even, stuck too m
      any values into the buffer (this is what |  | 
| 3112                         we get for padding the first byte with a zero). "Front-p
      atch" now by pushing all nybbles forward. |  | 
| 3113                         Doing it this way ensures that at least 50% of the time 
      (statistically speaking) we'll only be doing a |  | 
| 3114                         single pass and optimizes for strings with single digits
      . I'm just assuming that's the more common case. |  | 
| 3115                         */ |  | 
| 3116 |  | 
| 3117                         for(i = 2; i < endIndex; i++){ |  | 
| 3118                             numTempBuf[i] =     (((((numTempBuf[i] - 6)/2) % 10)
       * 10) + |  | 
| 3119                                 (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6; |  | 
| 3120                         } |  | 
| 3121                         --digIndx; |  | 
| 3122                     } |  | 
| 3123 |  | 
| 3124                     // Subtract one off of the last byte. |  | 
| 3125                     numTempBuf[endIndex-1] -= 1; |  | 
| 3126 |  | 
| 3127                     /* |  | 
| 3128                     We want to skip over the first two slots in the buffer. The 
      first slot |  | 
| 3129                     is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The 
      second slot is for the |  | 
| 3130                     sign/exponent byte: 0x80 + (decimalPos/2) & 7f. |  | 
| 3131                     */ |  | 
| 3132                     numTempBuf[0] = UCOL_CODAN_PLACEHOLDER; |  | 
| 3133                     numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F)); |  | 
| 3134 |  | 
| 3135                     // Now transfer the collation key to our collIterate struct. |  | 
| 3136                     // The total size for our collation key is endIndx bumped up
       to the next largest even value divided by two. |  | 
| 3137                     //size = ((endIndex+1) & ~1)/2; |  | 
| 3138                     CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARY
      ORDERSHIFT) | //Primary weight |  | 
| 3139                         (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Seco
      ndary weight |  | 
| 3140                         UCOL_BYTE_COMMON; // Tertiary weight. |  | 
| 3141                     i = 2; // Reset the index into the buffer. |  | 
| 3142                     while(i < endIndex) |  | 
| 3143                     { |  | 
| 3144                         uint32_t primWeight = numTempBuf[i++] << 8; |  | 
| 3145                         if ( i < endIndex) |  | 
| 3146                             primWeight |= numTempBuf[i++]; |  | 
| 3147                         *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHI
      FT) | UCOL_CONTINUATION_MARKER; |  | 
| 3148                     } |  | 
| 3149 |  | 
| 3150                 } else { |  | 
| 3151                     // no numeric mode, we'll just switch to whatever we stashed
       and continue |  | 
| 3152                     CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /
      * find the offset to expansion table */ |  | 
| 3153                     CE = *CEOffset++; |  | 
| 3154                     break; |  | 
| 3155                 } |  | 
| 3156                 return CE; |  | 
| 3157             } |  | 
| 3158             /* various implicits optimization */ |  | 
| 3159         case IMPLICIT_TAG:        /* everything that is not defined otherwise */ |  | 
| 3160             /* UCA is filled with these. Tailorings are NOT_FOUND */ |  | 
| 3161             return getImplicit(cp, source); |  | 
| 3162         case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
      */ |  | 
| 3163             // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImpl
      icit |  | 
| 3164             return getImplicit(cp, source); |  | 
| 3165         case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/ |  | 
| 3166             { |  | 
| 3167                 static const uint32_t |  | 
| 3168                     SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11
      A7; |  | 
| 3169                 //const uint32_t LCount = 19; |  | 
| 3170                 static const uint32_t VCount = 21; |  | 
| 3171                 static const uint32_t TCount = 28; |  | 
| 3172                 //const uint32_t NCount = VCount * TCount;   // 588 |  | 
| 3173                 //const uint32_t SCount = LCount * NCount;   // 11172 |  | 
| 3174                 uint32_t L = ch - SBase; |  | 
| 3175 |  | 
| 3176                 // divide into pieces |  | 
| 3177 |  | 
| 3178                 uint32_t T = L % TCount; // we do it in this order since some co
      mpilers can do % and / in one operation |  | 
| 3179                 L /= TCount; |  | 
| 3180                 uint32_t V = L % VCount; |  | 
| 3181                 L /= VCount; |  | 
| 3182 |  | 
| 3183                 // offset them |  | 
| 3184 |  | 
| 3185                 L += LBase; |  | 
| 3186                 V += VBase; |  | 
| 3187                 T += TBase; |  | 
| 3188 |  | 
| 3189                 // return the first CE, but first put the rest into the expansio
      n buffer |  | 
| 3190                 if (!source->coll->image->jamoSpecial) { // FAST PATH |  | 
| 3191 |  | 
| 3192                     *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V
      ); |  | 
| 3193                     if (T != TBase) { |  | 
| 3194                         *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mappin
      g, T); |  | 
| 3195                     } |  | 
| 3196 |  | 
| 3197                     return UTRIE_GET32_FROM_LEAD(&coll->mapping, L); |  | 
| 3198 |  | 
| 3199                 } else { // Jamo is Special |  | 
| 3200                     // Since Hanguls pass the FCD check, it is |  | 
| 3201                     // guaranteed that we won't be in |  | 
| 3202                     // the normalization buffer if something like this happens |  | 
| 3203 |  | 
| 3204                     // However, if we are using a uchar iterator and normalizati
      on |  | 
| 3205                     // is ON, the Hangul that lead us here is going to be in tha
      t |  | 
| 3206                     // normalization buffer. Here we want to restore the uchar |  | 
| 3207                     // iterator state and pull out of the normalization buffer |  | 
| 3208                     if(source->iterator != NULL && source->flags & UCOL_ITER_INN
      ORMBUF) { |  | 
| 3209                         source->flags = source->origFlags; // restore the iterat
      or |  | 
| 3210                         source->pos = NULL; |  | 
| 3211                     } |  | 
| 3212 |  | 
| 3213                     // Move Jamos into normalization buffer |  | 
| 3214                     UChar *buffer = source->writableBuffer.getBuffer(4); |  | 
| 3215                     int32_t bufferLength; |  | 
| 3216                     buffer[0] = (UChar)L; |  | 
| 3217                     buffer[1] = (UChar)V; |  | 
| 3218                     if (T != TBase) { |  | 
| 3219                         buffer[2] = (UChar)T; |  | 
| 3220                         bufferLength = 3; |  | 
| 3221                     } else { |  | 
| 3222                         bufferLength = 2; |  | 
| 3223                     } |  | 
| 3224                     source->writableBuffer.releaseBuffer(bufferLength); |  | 
| 3225 |  | 
| 3226                     // Indicate where to continue in main input string after exh
      austing the writableBuffer |  | 
| 3227                     source->fcdPosition       = source->pos; |  | 
| 3228 |  | 
| 3229                     source->pos   = source->writableBuffer.getTerminatedBuffer()
      ; |  | 
| 3230                     source->origFlags   = source->flags; |  | 
| 3231                     source->flags       |= UCOL_ITER_INNORMBUF; |  | 
| 3232                     source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN); |  | 
| 3233 |  | 
| 3234                     return(UCOL_IGNORABLE); |  | 
| 3235                 } |  | 
| 3236             } |  | 
| 3237         case SURROGATE_TAG: |  | 
| 3238             /* we encountered a leading surrogate. We shall get the CE by using 
      the following code unit */ |  | 
| 3239             /* two things can happen here: next code point can be a trailing sur
      rogate - we will use it */ |  | 
| 3240             /* to retrieve the CE, or it is not a trailing surrogate (or the str
      ing is done). In that case */ |  | 
| 3241             /* we treat it like an unassigned code point. */ |  | 
| 3242             { |  | 
| 3243                 UChar trail; |  | 
| 3244                 collIterateState state; |  | 
| 3245                 backupState(source, &state); |  | 
| 3246                 if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNorma
      lizedChar(source))))) { |  | 
| 3247                     // we chould have stepped one char forward and it might have
       turned that it |  | 
| 3248                     // was not a trail surrogate. In that case, we have to backu
      p. |  | 
| 3249                     loadState(source, &state, TRUE); |  | 
| 3250                     return UCOL_NOT_FOUND; |  | 
| 3251                 } else { |  | 
| 3252                     /* TODO: CE contain the data from the previous CE + the mask
      . It should at least be unmasked */ |  | 
| 3253                     CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFF
      FF, trail); |  | 
| 3254                     if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates 
      in this block, but not this one. |  | 
| 3255                         // We need to backup |  | 
| 3256                         loadState(source, &state, TRUE); |  | 
| 3257                         return CE; |  | 
| 3258                     } |  | 
| 3259                     // calculate the supplementary code point value, if surrogat
      e was not tailored |  | 
| 3260                     cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10U
      L)+0xdc00-0x10000)); |  | 
| 3261                 } |  | 
| 3262             } |  | 
| 3263             break; |  | 
| 3264         case LEAD_SURROGATE_TAG:  /* D800-DBFF*/ |  | 
| 3265             UChar nextChar; |  | 
| 3266             if( source->flags & UCOL_USE_ITERATOR) { |  | 
| 3267                 if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source
      ->iterator))) { |  | 
| 3268                     cp = U16_GET_SUPPLEMENTARY(ch, nextChar); |  | 
| 3269                     source->iterator->next(source->iterator); |  | 
| 3270                     return getImplicit(cp, source); |  | 
| 3271                 } |  | 
| 3272             } else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) || (source->po
      s<source->endp)) && |  | 
| 3273                       U_IS_TRAIL((nextChar=*source->pos))) { |  | 
| 3274                 cp = U16_GET_SUPPLEMENTARY(ch, nextChar); |  | 
| 3275                 source->pos++; |  | 
| 3276                 return getImplicit(cp, source); |  | 
| 3277             } |  | 
| 3278             return UCOL_NOT_FOUND; |  | 
| 3279         case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/ |  | 
| 3280             return UCOL_NOT_FOUND; /* broken surrogate sequence */ |  | 
| 3281         case CHARSET_TAG: |  | 
| 3282             /* not yet implemented */ |  | 
| 3283             /* probably after 1.8 */ |  | 
| 3284             return UCOL_NOT_FOUND; |  | 
| 3285         default: |  | 
| 3286             *status = U_INTERNAL_PROGRAM_ERROR; |  | 
| 3287             CE=0; |  | 
| 3288             break; |  | 
| 3289     } |  | 
| 3290     if (CE <= UCOL_NOT_FOUND) break; |  | 
| 3291   } |  | 
| 3292   return CE; |  | 
| 3293 } |  | 
| 3294 |  | 
| 3295 |  | 
| 3296 /* now uses Mark's getImplicitPrimary code */ |  | 
| 3297 static |  | 
| 3298 inline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) { |  | 
| 3299     uint32_t r = uprv_uca_getImplicitPrimary(cp); |  | 
| 3300 |  | 
| 3301     *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505; |  | 
| 3302     collationSource->toReturn = collationSource->CEpos; |  | 
| 3303 |  | 
| 3304     // **** doesn't work if using iterator **** |  | 
| 3305     if (collationSource->flags & UCOL_ITER_INNORMBUF) { |  | 
| 3306         collationSource->offsetRepeatCount = 1; |  | 
| 3307     } else { |  | 
| 3308         int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->
      string); |  | 
| 3309 |  | 
| 3310         UErrorCode errorCode = U_ZERO_ERROR; |  | 
| 3311         collationSource->appendOffset(firstOffset, errorCode); |  | 
| 3312         collationSource->appendOffset(firstOffset + 1, errorCode); |  | 
| 3313 |  | 
| 3314         collationSource->offsetReturn = collationSource->offsetStore - 1; |  | 
| 3315         *(collationSource->offsetBuffer) = firstOffset; |  | 
| 3316         if (collationSource->offsetReturn == collationSource->offsetBuffer) { |  | 
| 3317             collationSource->offsetStore = collationSource->offsetBuffer; |  | 
| 3318         } |  | 
| 3319     } |  | 
| 3320 |  | 
| 3321     return ((r & 0x0000FFFF)<<16) | 0x000000C0; |  | 
| 3322 } |  | 
| 3323 |  | 
| 3324 /** |  | 
| 3325  * This function handles the special CEs like contractions, expansions, |  | 
| 3326  * surrogates, Thai. |  | 
| 3327  * It is called by both getPrevCE |  | 
| 3328  */ |  | 
| 3329 uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, |  | 
| 3330                           collIterate *source, |  | 
| 3331                           UErrorCode *status) |  | 
| 3332 { |  | 
| 3333     const uint32_t *CEOffset    = NULL; |  | 
| 3334           UChar    *UCharOffset = NULL; |  | 
| 3335           UChar    schar; |  | 
| 3336     const UChar    *constart    = NULL; |  | 
| 3337           uint32_t size; |  | 
| 3338           UChar    buffer[UCOL_MAX_BUFFER]; |  | 
| 3339           uint32_t *endCEBuffer; |  | 
| 3340           UChar   *strbuffer; |  | 
| 3341           int32_t noChars = 0; |  | 
| 3342           int32_t CECount = 0; |  | 
| 3343 |  | 
| 3344     for(;;) |  | 
| 3345     { |  | 
| 3346         /* the only ces that loops are thai and contractions */ |  | 
| 3347         switch (getCETag(CE)) |  | 
| 3348         { |  | 
| 3349         case NOT_FOUND_TAG:  /* this tag always returns */ |  | 
| 3350             return CE; |  | 
| 3351 |  | 
| 3352         case SPEC_PROC_TAG: |  | 
| 3353             { |  | 
| 3354                 // Special processing is getting a CE that is preceded by a cert
      ain prefix |  | 
| 3355                 // Currently this is only needed for optimizing Japanese length 
      and iteration marks. |  | 
| 3356                 // When we encouter a special processing tag, we go backwards an
      d try to see if |  | 
| 3357                 // we have a match. |  | 
| 3358                 // Contraction tables are used - so the whole process is not unl
      ike contraction. |  | 
| 3359                 // prefix data is stored backwards in the table. |  | 
| 3360                 const UChar *UCharOffset; |  | 
| 3361                 UChar schar, tchar; |  | 
| 3362                 collIterateState prefixState; |  | 
| 3363                 backupState(source, &prefixState); |  | 
| 3364                 for(;;) { |  | 
| 3365                     // This loop will run once per source string character, for 
      as long as we |  | 
| 3366                     //  are matching a potential contraction sequence |  | 
| 3367 |  | 
| 3368                     // First we position ourselves at the begining of contractio
      n sequence |  | 
| 3369                     const UChar *ContractionStart = UCharOffset = (UChar *)coll-
      >image+getContractOffset(CE); |  | 
| 3370 |  | 
| 3371                     if (collIter_bos(source)) { |  | 
| 3372                         CE = *(coll->contractionCEs + (UCharOffset - coll->contr
      actionIndex)); |  | 
| 3373                         break; |  | 
| 3374                     } |  | 
| 3375                     schar = getPrevNormalizedChar(source, status); |  | 
| 3376                     goBackOne(source); |  | 
| 3377 |  | 
| 3378                     while(schar > (tchar = *UCharOffset)) { /* since the contrac
      tion codepoints should be ordered, we skip all that are smaller */ |  | 
| 3379                         UCharOffset++; |  | 
| 3380                     } |  | 
| 3381 |  | 
| 3382                     if (schar == tchar) { |  | 
| 3383                         // Found the source string char in the table. |  | 
| 3384                         //  Pick up the corresponding CE from the table. |  | 
| 3385                         CE = *(coll->contractionCEs + |  | 
| 3386                             (UCharOffset - coll->contractionIndex)); |  | 
| 3387                     } |  | 
| 3388                     else |  | 
| 3389                     { |  | 
| 3390                         // if there is a completely ignorable code point in the 
      middle of |  | 
| 3391                         // a prefix, we need to act as if it's not there |  | 
| 3392                         // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-
      fdef are set to zero) |  | 
| 3393                         // lone surrogates cannot be set to zero as it would bre
      ak other processing |  | 
| 3394                         uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping
      , schar); |  | 
| 3395                         // it's easy for BMP code points |  | 
| 3396                         if(isZeroCE == 0) { |  | 
| 3397                             continue; |  | 
| 3398                         } else if(U16_IS_SURROGATE(schar)) { |  | 
| 3399                             // for supplementary code points, we have to check t
      he next one |  | 
| 3400                             // situations where we are going to ignore |  | 
| 3401                             // 1. beginning of the string: schar is a lone surro
      gate |  | 
| 3402                             // 2. schar is a lone surrogate |  | 
| 3403                             // 3. schar is a trail surrogate in a valid surrogat
      e sequence |  | 
| 3404                             //    that is explicitly set to zero. |  | 
| 3405                             if (!collIter_bos(source)) { |  | 
| 3406                                 UChar lead; |  | 
| 3407                                 if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD(
      lead = getPrevNormalizedChar(source, status))) { |  | 
| 3408                                     isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapp
      ing, lead); |  | 
| 3409                                     if(isSpecial(isZeroCE) && getCETag(isZeroCE)
       == SURROGATE_TAG) { |  | 
| 3410                                         uint32_t finalCE = UTRIE_GET32_FROM_OFFS
      ET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar); |  | 
| 3411                                         if(finalCE == 0) { |  | 
| 3412                                             // this is a real, assigned complete
      ly ignorable code point |  | 
| 3413                                             goBackOne(source); |  | 
| 3414                                             continue; |  | 
| 3415                                         } |  | 
| 3416                                     } |  | 
| 3417                                 } else { |  | 
| 3418                                     // lone surrogate, treat like unassigned |  | 
| 3419                                     return UCOL_NOT_FOUND; |  | 
| 3420                                 } |  | 
| 3421                             } else { |  | 
| 3422                                 // lone surrogate at the beggining, treat like u
      nassigned |  | 
| 3423                                 return UCOL_NOT_FOUND; |  | 
| 3424                             } |  | 
| 3425                         } |  | 
| 3426                         // Source string char was not in the table. |  | 
| 3427                         //   We have not found the prefix. |  | 
| 3428                         CE = *(coll->contractionCEs + |  | 
| 3429                             (ContractionStart - coll->contractionIndex)); |  | 
| 3430                     } |  | 
| 3431 |  | 
| 3432                     if(!isPrefix(CE)) { |  | 
| 3433                         // The source string char was in the contraction table, 
      and the corresponding |  | 
| 3434                         //   CE is not a prefix CE.  We found the prefix, break |  | 
| 3435                         //   out of loop, this CE will end up being returned.  T
      his is the normal |  | 
| 3436                         //   way out of prefix handling when the source actually
       contained |  | 
| 3437                         //   the prefix. |  | 
| 3438                         break; |  | 
| 3439                     } |  | 
| 3440                 } |  | 
| 3441                 loadState(source, &prefixState, TRUE); |  | 
| 3442                 break; |  | 
| 3443             } |  | 
| 3444 |  | 
| 3445         case CONTRACTION_TAG: { |  | 
| 3446             /* to ensure that the backwards and forwards iteration matches, we |  | 
| 3447             take the current region of most possible match and pass it through |  | 
| 3448             the forward iteration. this will ensure that the obstinate problem o
      f |  | 
| 3449             overlapping contractions will not occur. |  | 
| 3450             */ |  | 
| 3451             schar = peekCodeUnit(source, 0); |  | 
| 3452             constart = (UChar *)coll->image + getContractOffset(CE); |  | 
| 3453             if (isAtStartPrevIterate(source) |  | 
| 3454                 /* commented away contraction end checks after adding the checks |  | 
| 3455                 in getPrevCE  */) { |  | 
| 3456                     /* start of string or this is not the end of any contraction
       */ |  | 
| 3457                     CE = *(coll->contractionCEs + |  | 
| 3458                         (constart - coll->contractionIndex)); |  | 
| 3459                     break; |  | 
| 3460             } |  | 
| 3461             strbuffer = buffer; |  | 
| 3462             UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1); |  | 
| 3463             *(UCharOffset --) = 0; |  | 
| 3464             noChars = 0; |  | 
| 3465             // have to swap thai characters |  | 
| 3466             while (ucol_unsafeCP(schar, coll)) { |  | 
| 3467                 *(UCharOffset) = schar; |  | 
| 3468                 noChars++; |  | 
| 3469                 UCharOffset --; |  | 
| 3470                 schar = getPrevNormalizedChar(source, status); |  | 
| 3471                 goBackOne(source); |  | 
| 3472                 // TODO: when we exhaust the contraction buffer, |  | 
| 3473                 // it needs to get reallocated. The problem is |  | 
| 3474                 // that the size depends on the string which is |  | 
| 3475                 // not iterated over. However, since we're travelling |  | 
| 3476                 // backwards, we already had to set the iterator at |  | 
| 3477                 // the end - so we might as well know where we are? |  | 
| 3478                 if (UCharOffset + 1 == buffer) { |  | 
| 3479                     /* we have exhausted the buffer */ |  | 
| 3480                     int32_t newsize = 0; |  | 
| 3481                     if(source->pos) { // actually dealing with a position |  | 
| 3482                         newsize = (int32_t)(source->pos - source->string + 1); |  | 
| 3483                     } else { // iterator |  | 
| 3484                         newsize = 4 * UCOL_MAX_BUFFER; |  | 
| 3485                     } |  | 
| 3486                     strbuffer = (UChar *)uprv_malloc(sizeof(UChar) * |  | 
| 3487                         (newsize + UCOL_MAX_BUFFER)); |  | 
| 3488                     /* test for NULL */ |  | 
| 3489                     if (strbuffer == NULL) { |  | 
| 3490                         *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 3491                         return UCOL_NO_MORE_CES; |  | 
| 3492                     } |  | 
| 3493                     UCharOffset = strbuffer + newsize; |  | 
| 3494                     uprv_memcpy(UCharOffset, buffer, |  | 
| 3495                         UCOL_MAX_BUFFER * sizeof(UChar)); |  | 
| 3496                     UCharOffset --; |  | 
| 3497                 } |  | 
| 3498                 if ((source->pos && (source->pos == source->string || |  | 
| 3499                     ((source->flags & UCOL_ITER_INNORMBUF) && |  | 
| 3500                     *(source->pos - 1) == 0 && source->fcdPosition == NULL))) |  | 
| 3501                     || (source->iterator && !source->iterator->hasPrevious(sourc
      e->iterator))) { |  | 
| 3502                         break; |  | 
| 3503                 } |  | 
| 3504             } |  | 
| 3505             /* adds the initial base character to the string */ |  | 
| 3506             *(UCharOffset) = schar; |  | 
| 3507             noChars++; |  | 
| 3508 |  | 
| 3509             int32_t offsetBias; |  | 
| 3510 |  | 
| 3511             // **** doesn't work if using iterator **** |  | 
| 3512             if (source->flags & UCOL_ITER_INNORMBUF) { |  | 
| 3513                 offsetBias = -1; |  | 
| 3514             } else { |  | 
| 3515                 offsetBias = (int32_t)(source->pos - source->string); |  | 
| 3516             } |  | 
| 3517 |  | 
| 3518             /* a new collIterate is used to simplify things, since using the cur
      rent |  | 
| 3519             collIterate will mean that the forward and backwards iteration will |  | 
| 3520             share and change the same buffers. we don't want to get into that. *
      / |  | 
| 3521             collIterate temp; |  | 
| 3522             int32_t rawOffset; |  | 
| 3523 |  | 
| 3524             IInit_collIterate(coll, UCharOffset, noChars, &temp, status); |  | 
| 3525             if(U_FAILURE(*status)) { |  | 
| 3526                 return (uint32_t)UCOL_NULLORDER; |  | 
| 3527             } |  | 
| 3528             temp.flags &= ~UCOL_ITER_NORM; |  | 
| 3529             temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT; |  | 
| 3530 |  | 
| 3531             rawOffset = (int32_t)(temp.pos - temp.string); // should always be z
      ero? |  | 
| 3532             CE = ucol_IGetNextCE(coll, &temp, status); |  | 
| 3533 |  | 
| 3534             if (source->extendCEs) { |  | 
| 3535                 endCEBuffer = source->extendCEs + source->extendCEsSize; |  | 
| 3536                 CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(u
      int32_t)); |  | 
| 3537             } else { |  | 
| 3538                 endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE; |  | 
| 3539                 CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_
      t)); |  | 
| 3540             } |  | 
| 3541 |  | 
| 3542             while (CE != UCOL_NO_MORE_CES) { |  | 
| 3543                 *(source->CEpos ++) = CE; |  | 
| 3544 |  | 
| 3545                 if (offsetBias >= 0) { |  | 
| 3546                     source->appendOffset(rawOffset + offsetBias, *status); |  | 
| 3547                 } |  | 
| 3548 |  | 
| 3549                 CECount++; |  | 
| 3550                 if (source->CEpos == endCEBuffer) { |  | 
| 3551                     /* ran out of CE space, reallocate to new buffer. |  | 
| 3552                     If reallocation fails, reset pointers and bail out, |  | 
| 3553                     there's no guarantee of the right character position after |  | 
| 3554                     this bail*/ |  | 
| 3555                     if (!increaseCEsCapacity(source)) { |  | 
| 3556                         *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 3557                         break; |  | 
| 3558                     } |  | 
| 3559 |  | 
| 3560                     endCEBuffer = source->extendCEs + source->extendCEsSize; |  | 
| 3561                 } |  | 
| 3562 |  | 
| 3563                 if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) { |  | 
| 3564                     rawOffset = (int32_t)(temp.fcdPosition - temp.string); |  | 
| 3565                 } else { |  | 
| 3566                     rawOffset = (int32_t)(temp.pos - temp.string); |  | 
| 3567                 } |  | 
| 3568 |  | 
| 3569                 CE = ucol_IGetNextCE(coll, &temp, status); |  | 
| 3570             } |  | 
| 3571 |  | 
| 3572             if (strbuffer != buffer) { |  | 
| 3573                 uprv_free(strbuffer); |  | 
| 3574             } |  | 
| 3575             if (U_FAILURE(*status)) { |  | 
| 3576                 return (uint32_t)UCOL_NULLORDER; |  | 
| 3577             } |  | 
| 3578 |  | 
| 3579             if (source->offsetRepeatValue != 0) { |  | 
| 3580                 if (CECount > noChars) { |  | 
| 3581                     source->offsetRepeatCount += temp.offsetRepeatCount; |  | 
| 3582                 } else { |  | 
| 3583                     // **** does this really skip the right offsets? **** |  | 
| 3584                     source->offsetReturn -= (noChars - CECount); |  | 
| 3585                 } |  | 
| 3586             } |  | 
| 3587 |  | 
| 3588             if (offsetBias >= 0) { |  | 
| 3589                 source->offsetReturn = source->offsetStore - 1; |  | 
| 3590                 if (source->offsetReturn == source->offsetBuffer) { |  | 
| 3591                     source->offsetStore = source->offsetBuffer; |  | 
| 3592                 } |  | 
| 3593             } |  | 
| 3594 |  | 
| 3595             source->toReturn = source->CEpos - 1; |  | 
| 3596             if (source->toReturn == source->CEs) { |  | 
| 3597                 source->CEpos = source->CEs; |  | 
| 3598             } |  | 
| 3599 |  | 
| 3600             return *(source->toReturn); |  | 
| 3601         } |  | 
| 3602         case LONG_PRIMARY_TAG: |  | 
| 3603             { |  | 
| 3604                 *(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON 
      << 8) | UCOL_BYTE_COMMON; |  | 
| 3605                 *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER; |  | 
| 3606                 source->toReturn = source->CEpos - 1; |  | 
| 3607 |  | 
| 3608                 if (source->flags & UCOL_ITER_INNORMBUF) { |  | 
| 3609                     source->offsetRepeatCount = 1; |  | 
| 3610                 } else { |  | 
| 3611                     int32_t firstOffset = (int32_t)(source->pos - source->string
      ); |  | 
| 3612 |  | 
| 3613                     source->appendOffset(firstOffset, *status); |  | 
| 3614                     source->appendOffset(firstOffset + 1, *status); |  | 
| 3615 |  | 
| 3616                     source->offsetReturn = source->offsetStore - 1; |  | 
| 3617                     *(source->offsetBuffer) = firstOffset; |  | 
| 3618                     if (source->offsetReturn == source->offsetBuffer) { |  | 
| 3619                         source->offsetStore = source->offsetBuffer; |  | 
| 3620                     } |  | 
| 3621                 } |  | 
| 3622 |  | 
| 3623 |  | 
| 3624                 return *(source->toReturn); |  | 
| 3625             } |  | 
| 3626 |  | 
| 3627         case EXPANSION_TAG: /* this tag always returns */ |  | 
| 3628             { |  | 
| 3629             /* |  | 
| 3630             This should handle expansion. |  | 
| 3631             NOTE: we can encounter both continuations and expansions in an expan
      sion! |  | 
| 3632             I have to decide where continuations are going to be dealt with |  | 
| 3633             */ |  | 
| 3634             int32_t firstOffset = (int32_t)(source->pos - source->string); |  | 
| 3635 |  | 
| 3636             // **** doesn't work if using iterator **** |  | 
| 3637             if (source->offsetReturn != NULL) { |  | 
| 3638                 if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetRet
      urn == source->offsetBuffer) { |  | 
| 3639                     source->offsetStore = source->offsetBuffer; |  | 
| 3640                 }else { |  | 
| 3641                   firstOffset = -1; |  | 
| 3642                 } |  | 
| 3643             } |  | 
| 3644 |  | 
| 3645             /* find the offset to expansion table */ |  | 
| 3646             CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE); |  | 
| 3647             size     = getExpansionCount(CE); |  | 
| 3648             if (size != 0) { |  | 
| 3649                 /* |  | 
| 3650                 if there are less than 16 elements in expansion, we don't termin
      ate |  | 
| 3651                 */ |  | 
| 3652                 uint32_t count; |  | 
| 3653 |  | 
| 3654                 for (count = 0; count < size; count++) { |  | 
| 3655                     *(source->CEpos ++) = *CEOffset++; |  | 
| 3656 |  | 
| 3657                     if (firstOffset >= 0) { |  | 
| 3658                         source->appendOffset(firstOffset + 1, *status); |  | 
| 3659                     } |  | 
| 3660                 } |  | 
| 3661             } else { |  | 
| 3662                 /* else, we do */ |  | 
| 3663                 while (*CEOffset != 0) { |  | 
| 3664                     *(source->CEpos ++) = *CEOffset ++; |  | 
| 3665 |  | 
| 3666                     if (firstOffset >= 0) { |  | 
| 3667                         source->appendOffset(firstOffset + 1, *status); |  | 
| 3668                     } |  | 
| 3669                 } |  | 
| 3670             } |  | 
| 3671 |  | 
| 3672             if (firstOffset >= 0) { |  | 
| 3673                 source->offsetReturn = source->offsetStore - 1; |  | 
| 3674                 *(source->offsetBuffer) = firstOffset; |  | 
| 3675                 if (source->offsetReturn == source->offsetBuffer) { |  | 
| 3676                     source->offsetStore = source->offsetBuffer; |  | 
| 3677                 } |  | 
| 3678             } else { |  | 
| 3679                 source->offsetRepeatCount += size - 1; |  | 
| 3680             } |  | 
| 3681 |  | 
| 3682             source->toReturn = source->CEpos - 1; |  | 
| 3683             // in case of one element expansion, we |  | 
| 3684             // want to immediately return CEpos |  | 
| 3685             if(source->toReturn == source->CEs) { |  | 
| 3686                 source->CEpos = source->CEs; |  | 
| 3687             } |  | 
| 3688 |  | 
| 3689             return *(source->toReturn); |  | 
| 3690             } |  | 
| 3691 |  | 
| 3692         case DIGIT_TAG: |  | 
| 3693             { |  | 
| 3694                 /* |  | 
| 3695                 We do a check to see if we want to collate digits as numbers; if
       so we generate |  | 
| 3696                 a custom collation key. Otherwise we pull out the value stored i
      n the expansion table. |  | 
| 3697                 */ |  | 
| 3698                 uint32_t i;    /* general counter */ |  | 
| 3699 |  | 
| 3700                 if (source->coll->numericCollation == UCOL_ON){ |  | 
| 3701                     uint32_t digIndx = 0; |  | 
| 3702                     uint32_t endIndex = 0; |  | 
| 3703                     uint32_t leadingZeroIndex = 0; |  | 
| 3704                     uint32_t trailingZeroCount = 0; |  | 
| 3705 |  | 
| 3706                     uint8_t collateVal = 0; |  | 
| 3707 |  | 
| 3708                     UBool nonZeroValReached = FALSE; |  | 
| 3709 |  | 
| 3710                     uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I j
      ust need a temporary place to store my generated CEs. |  | 
| 3711                     /* |  | 
| 3712                     We parse the source string until we hit a char that's NOT a 
      digit. |  | 
| 3713                     Use this u_charDigitValue. This might be slow because we hav
      e to |  | 
| 3714                     handle surrogates... |  | 
| 3715                     */ |  | 
| 3716                     /* |  | 
| 3717                     We need to break up the digit string into collection element
      s of UCOL_MAX_DIGITS_FOR_NUMBER or less, |  | 
| 3718                     with any chunks smaller than that being on the right end of 
      the digit string - i.e. the first collation |  | 
| 3719                     element we process when going backward. To determine how lon
      g that chunk might be, we may need to make |  | 
| 3720                     two passes through the loop that collects digits - one to se
      e how long the string is (and how much is |  | 
| 3721                     leading zeros) to determine the length of that right-hand ch
      unk, and a second (if the whole string has |  | 
| 3722                     more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits
      ) to actually process that collation |  | 
| 3723                     element chunk after resetting the state to the initialState 
      at the right side of the digit string. |  | 
| 3724                     */ |  | 
| 3725                     uint32_t ceLimit = 0; |  | 
| 3726                     UChar initial_ch = ch; |  | 
| 3727                     collIterateState initialState = {0,0,0,0,0,0,0,0,0}; |  | 
| 3728                     backupState(source, &initialState); |  | 
| 3729 |  | 
| 3730                     for(;;) { |  | 
| 3731                         collIterateState state = {0,0,0,0,0,0,0,0,0}; |  | 
| 3732                         UChar32 char32 = 0; |  | 
| 3733                         int32_t digVal = 0; |  | 
| 3734 |  | 
| 3735                         if (U16_IS_TRAIL (ch)) { |  | 
| 3736                             if (!collIter_bos(source)){ |  | 
| 3737                                 UChar lead = getPrevNormalizedChar(source, statu
      s); |  | 
| 3738                                 if(U16_IS_LEAD(lead)) { |  | 
| 3739                                     char32 = U16_GET_SUPPLEMENTARY(lead,ch); |  | 
| 3740                                     goBackOne(source); |  | 
| 3741                                 } else { |  | 
| 3742                                     char32 = ch; |  | 
| 3743                                 } |  | 
| 3744                             } else { |  | 
| 3745                                 char32 = ch; |  | 
| 3746                             } |  | 
| 3747                         } else { |  | 
| 3748                             char32 = ch; |  | 
| 3749                         } |  | 
| 3750                         digVal = u_charDigitValue(char32); |  | 
| 3751 |  | 
| 3752                         for(;;) { |  | 
| 3753                             // Make sure we have enough space. No longer needed; |  | 
| 3754                             // at this point the largest value of digIndx when w
      e need to save data in numTempBuf |  | 
| 3755                             // is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post-
      incremented) so we just ensure |  | 
| 3756                             // that numTempBuf is big enough (UCOL_MAX_DIGITS_FO
      R_NUMBER/2 + 2). |  | 
| 3757 |  | 
| 3758                             // Skip over trailing zeroes, and keep a count of th
      em. |  | 
| 3759                             if (digVal != 0) |  | 
| 3760                                 nonZeroValReached = TRUE; |  | 
| 3761 |  | 
| 3762                             if (nonZeroValReached) { |  | 
| 3763                                 /* |  | 
| 3764                                 We parse the digit string into base 100 numbers 
      (this fits into a byte). |  | 
| 3765                                 We only add to the buffer in twos, thus if we ar
      e parsing an odd character, |  | 
| 3766                                 that serves as the 'tens' digit while the if we 
      are parsing an even one, that |  | 
| 3767                                 is the 'ones' digit. We dumped the parsed base 1
      00 value (collateVal) into |  | 
| 3768                                 a buffer. We multiply each collateVal by 2 (to g
      ive us room) and add 5 (to avoid |  | 
| 3769                                 overlapping magic CE byte values). The last byte
       we subtract 1 to ensure it is less |  | 
| 3770                                 than all the other bytes. |  | 
| 3771 |  | 
| 3772                                 Since we're doing in this reverse we want to put
       the first digit encountered into the |  | 
| 3773                                 ones place and the second digit encountered into
       the tens place. |  | 
| 3774                                 */ |  | 
| 3775 |  | 
| 3776                                 if ((digIndx + trailingZeroCount) % 2 == 1) { |  | 
| 3777                                     // High-order digit case (tens place) |  | 
| 3778                                     collateVal += (uint8_t)(digVal * 10); |  | 
| 3779 |  | 
| 3780                                     // We cannot set leadingZeroIndex unless it 
      has been set for the |  | 
| 3781                                     // low-order digit. Therefore, all we can do
       for the high-order |  | 
| 3782                                     // digit is turn it off, never on. |  | 
| 3783                                     // The only time we will have a high digit w
      ithout a low is for |  | 
| 3784                                     // the very first non-zero digit, so no zero
       check is necessary. |  | 
| 3785                                     if (collateVal != 0) |  | 
| 3786                                         leadingZeroIndex = 0; |  | 
| 3787 |  | 
| 3788                                     // The first pass through, digIndx may excee
      d the limit, but in that case |  | 
| 3789                                     // we no longer care about numTempBuf conten
      ts since they will be discarded |  | 
| 3790                                     if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) 
      { |  | 
| 3791                                         numTempBuf[(digIndx/2) + 2] = collateVal
      *2 + 6; |  | 
| 3792                                     } |  | 
| 3793                                     collateVal = 0; |  | 
| 3794                                 } else { |  | 
| 3795                                     // Low-order digit case (ones place) |  | 
| 3796                                     collateVal = (uint8_t)digVal; |  | 
| 3797 |  | 
| 3798                                     // Check for leading zeroes. |  | 
| 3799                                     if (collateVal == 0) { |  | 
| 3800                                         if (!leadingZeroIndex) |  | 
| 3801                                             leadingZeroIndex = (digIndx/2) + 2; |  | 
| 3802                                     } else |  | 
| 3803                                         leadingZeroIndex = 0; |  | 
| 3804 |  | 
| 3805                                     // No need to write to buffer; the case of a
       last odd digit |  | 
| 3806                                     // is handled below. |  | 
| 3807                                 } |  | 
| 3808                                 ++digIndx; |  | 
| 3809                             } else |  | 
| 3810                                 ++trailingZeroCount; |  | 
| 3811 |  | 
| 3812                             if (!collIter_bos(source)) { |  | 
| 3813                                 ch = getPrevNormalizedChar(source, status); |  | 
| 3814                                 //goBackOne(source); |  | 
| 3815                                 if (U16_IS_TRAIL(ch)) { |  | 
| 3816                                     backupState(source, &state); |  | 
| 3817                                     if (!collIter_bos(source)) { |  | 
| 3818                                         goBackOne(source); |  | 
| 3819                                         UChar lead = getPrevNormalizedChar(sourc
      e, status); |  | 
| 3820 |  | 
| 3821                                         if(U16_IS_LEAD(lead)) { |  | 
| 3822                                             char32 = U16_GET_SUPPLEMENTARY(lead,
      ch); |  | 
| 3823                                         } else { |  | 
| 3824                                             loadState(source, &state, FALSE); |  | 
| 3825                                             char32 = ch; |  | 
| 3826                                         } |  | 
| 3827                                     } |  | 
| 3828                                 } else |  | 
| 3829                                     char32 = ch; |  | 
| 3830 |  | 
| 3831                                 if ((digVal = u_charDigitValue(char32)) == -1 ||
       (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) { |  | 
| 3832                                     if (char32 > 0xFFFF) {// For surrogates. |  | 
| 3833                                         loadState(source, &state, FALSE); |  | 
| 3834                                     } |  | 
| 3835                                     // Don't need to "reverse" the goBackOne cal
      l, |  | 
| 3836                                     // as this points to the next position to pr
      ocess.. |  | 
| 3837                                     //if (char32 > 0xFFFF) // For surrogates. |  | 
| 3838                                     //getNextNormalizedChar(source); |  | 
| 3839                                     break; |  | 
| 3840                                 } |  | 
| 3841 |  | 
| 3842                                 goBackOne(source); |  | 
| 3843                             }else |  | 
| 3844                                 break; |  | 
| 3845                         } |  | 
| 3846 |  | 
| 3847                         if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_N
      UMBER) { |  | 
| 3848                             // our collation element is not too big, go ahead an
      d finish with it |  | 
| 3849                             break; |  | 
| 3850                         } |  | 
| 3851                         // our digit string is too long for a collation element; |  | 
| 3852                         // set the limit for it, reset the state and begin again |  | 
| 3853                         ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGIT
      S_FOR_NUMBER; |  | 
| 3854                         if ( ceLimit == 0 ) { |  | 
| 3855                             ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER; |  | 
| 3856                         } |  | 
| 3857                         ch = initial_ch; |  | 
| 3858                         loadState(source, &initialState, FALSE); |  | 
| 3859                         digIndx = endIndex = leadingZeroIndex = trailingZeroCoun
      t = 0; |  | 
| 3860                         collateVal = 0; |  | 
| 3861                         nonZeroValReached = FALSE; |  | 
| 3862                     } |  | 
| 3863 |  | 
| 3864                     if (! nonZeroValReached) { |  | 
| 3865                         digIndx = 2; |  | 
| 3866                         trailingZeroCount = 0; |  | 
| 3867                         numTempBuf[2] = 6; |  | 
| 3868                     } |  | 
| 3869 |  | 
| 3870                     if ((digIndx + trailingZeroCount) % 2 != 0) { |  | 
| 3871                         numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6; |  | 
| 3872                         digIndx += 1;       // The implicit leading zero |  | 
| 3873                     } |  | 
| 3874                     if (trailingZeroCount % 2 != 0) { |  | 
| 3875                         // We had to consume one trailing zero for the low digit |  | 
| 3876                         // of the least significant byte |  | 
| 3877                         digIndx += 1;       // The trailing zero not in the expo
      nent |  | 
| 3878                         trailingZeroCount -= 1; |  | 
| 3879                     } |  | 
| 3880 |  | 
| 3881                     endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2
      ) + 2) ; |  | 
| 3882 |  | 
| 3883                     // Subtract one off of the last byte. Really the first byte 
      here, but it's reversed... |  | 
| 3884                     numTempBuf[2] -= 1; |  | 
| 3885 |  | 
| 3886                     /* |  | 
| 3887                     We want to skip over the first two slots in the buffer. The 
      first slot |  | 
| 3888                     is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The 
      second slot is for the |  | 
| 3889                     sign/exponent byte: 0x80 + (decimalPos/2) & 7f. |  | 
| 3890                     The exponent must be adjusted by the number of leading zeroe
      s, and the number of |  | 
| 3891                     trailing zeroes. |  | 
| 3892                     */ |  | 
| 3893                     numTempBuf[0] = UCOL_CODAN_PLACEHOLDER; |  | 
| 3894                     uint32_t exponent = (digIndx+trailingZeroCount)/2; |  | 
| 3895                     if (leadingZeroIndex) |  | 
| 3896                         exponent -= ((digIndx/2) + 2 - leadingZeroIndex); |  | 
| 3897                     numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F)); |  | 
| 3898 |  | 
| 3899                     // Now transfer the collation key to our collIterate struct. |  | 
| 3900                     // The total size for our collation key is half of endIndex,
       rounded up. |  | 
| 3901                     int32_t size = (endIndex+1)/2; |  | 
| 3902                     if(!ensureCEsCapacity(source, size)) { |  | 
| 3903                         return (uint32_t)UCOL_NULLORDER; |  | 
| 3904                     } |  | 
| 3905                     *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1])
       << UCOL_PRIMARYORDERSHIFT) | //Primary weight |  | 
| 3906                         (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Seco
      ndary weight |  | 
| 3907                         UCOL_BYTE_COMMON; // Tertiary weight. |  | 
| 3908                     i = endIndex - 1; // Reset the index into the buffer. |  | 
| 3909                     while(i >= 2) { |  | 
| 3910                         uint32_t primWeight = numTempBuf[i--] << 8; |  | 
| 3911                         if ( i >= 2) |  | 
| 3912                             primWeight |= numTempBuf[i--]; |  | 
| 3913                         *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHI
      FT) | UCOL_CONTINUATION_MARKER; |  | 
| 3914                     } |  | 
| 3915 |  | 
| 3916                     source->toReturn = source->CEpos -1; |  | 
| 3917                     return *(source->toReturn); |  | 
| 3918                 } else { |  | 
| 3919                     CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE); |  | 
| 3920                     CE = *(CEOffset++); |  | 
| 3921                     break; |  | 
| 3922                 } |  | 
| 3923             } |  | 
| 3924 |  | 
| 3925         case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/ |  | 
| 3926             { |  | 
| 3927                 static const uint32_t |  | 
| 3928                     SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11
      A7; |  | 
| 3929                 //const uint32_t LCount = 19; |  | 
| 3930                 static const uint32_t VCount = 21; |  | 
| 3931                 static const uint32_t TCount = 28; |  | 
| 3932                 //const uint32_t NCount = VCount * TCount;   /* 588 */ |  | 
| 3933                 //const uint32_t SCount = LCount * NCount;   /* 11172 */ |  | 
| 3934 |  | 
| 3935                 uint32_t L = ch - SBase; |  | 
| 3936                 /* |  | 
| 3937                 divide into pieces. |  | 
| 3938                 we do it in this order since some compilers can do % and / in on
      e |  | 
| 3939                 operation |  | 
| 3940                 */ |  | 
| 3941                 uint32_t T = L % TCount; |  | 
| 3942                 L /= TCount; |  | 
| 3943                 uint32_t V = L % VCount; |  | 
| 3944                 L /= VCount; |  | 
| 3945 |  | 
| 3946                 /* offset them */ |  | 
| 3947                 L += LBase; |  | 
| 3948                 V += VBase; |  | 
| 3949                 T += TBase; |  | 
| 3950 |  | 
| 3951                 int32_t firstOffset = (int32_t)(source->pos - source->string); |  | 
| 3952                 source->appendOffset(firstOffset, *status); |  | 
| 3953 |  | 
| 3954                 /* |  | 
| 3955                  * return the first CE, but first put the rest into the expansio
      n buffer |  | 
| 3956                  */ |  | 
| 3957                 if (!source->coll->image->jamoSpecial) { |  | 
| 3958                     *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L
      ); |  | 
| 3959                     *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V
      ); |  | 
| 3960                     source->appendOffset(firstOffset + 1, *status); |  | 
| 3961 |  | 
| 3962                     if (T != TBase) { |  | 
| 3963                         *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mappin
      g, T); |  | 
| 3964                         source->appendOffset(firstOffset + 1, *status); |  | 
| 3965                     } |  | 
| 3966 |  | 
| 3967                     source->toReturn = source->CEpos - 1; |  | 
| 3968 |  | 
| 3969                     source->offsetReturn = source->offsetStore - 1; |  | 
| 3970                     if (source->offsetReturn == source->offsetBuffer) { |  | 
| 3971                         source->offsetStore = source->offsetBuffer; |  | 
| 3972                     } |  | 
| 3973 |  | 
| 3974                     return *(source->toReturn); |  | 
| 3975                 } else { |  | 
| 3976                     // Since Hanguls pass the FCD check, it is |  | 
| 3977                     // guaranteed that we won't be in |  | 
| 3978                     // the normalization buffer if something like this happens |  | 
| 3979 |  | 
| 3980                     // Move Jamos into normalization buffer |  | 
| 3981                     UChar *tempbuffer = source->writableBuffer.getBuffer(5); |  | 
| 3982                     int32_t tempbufferLength, jamoOffset; |  | 
| 3983                     tempbuffer[0] = 0; |  | 
| 3984                     tempbuffer[1] = (UChar)L; |  | 
| 3985                     tempbuffer[2] = (UChar)V; |  | 
| 3986                     if (T != TBase) { |  | 
| 3987                         tempbuffer[3] = (UChar)T; |  | 
| 3988                         tempbufferLength = 4; |  | 
| 3989                     } else { |  | 
| 3990                         tempbufferLength = 3; |  | 
| 3991                     } |  | 
| 3992                     source->writableBuffer.releaseBuffer(tempbufferLength); |  | 
| 3993 |  | 
| 3994                     // Indicate where to continue in main input string after exh
      austing the writableBuffer |  | 
| 3995                     if (source->pos  == source->string) { |  | 
| 3996                         jamoOffset = 0; |  | 
| 3997                         source->fcdPosition = NULL; |  | 
| 3998                     } else { |  | 
| 3999                         jamoOffset = source->pos - source->string; |  | 
| 4000                         source->fcdPosition       = source->pos-1; |  | 
| 4001                     } |  | 
| 4002 |  | 
| 4003                     // Append offsets for the additional chars |  | 
| 4004                     // (not the 0, and not the L whose offsets match the origina
      l Hangul) |  | 
| 4005                     int32_t jamoRemaining = tempbufferLength - 2; |  | 
| 4006                     jamoOffset++; // appended offsets should match end of origin
      al Hangul |  | 
| 4007                     while (jamoRemaining-- > 0) { |  | 
| 4008                         source->appendOffset(jamoOffset, *status); |  | 
| 4009                     } |  | 
| 4010 |  | 
| 4011                     source->offsetRepeatValue = jamoOffset; |  | 
| 4012 |  | 
| 4013                     source->offsetReturn = source->offsetStore - 1; |  | 
| 4014                     if (source->offsetReturn == source->offsetBuffer) { |  | 
| 4015                         source->offsetStore = source->offsetBuffer; |  | 
| 4016                     } |  | 
| 4017 |  | 
| 4018                     source->pos               = source->writableBuffer.getTermin
      atedBuffer() + tempbufferLength; |  | 
| 4019                     source->origFlags         = source->flags; |  | 
| 4020                     source->flags            |= UCOL_ITER_INNORMBUF; |  | 
| 4021                     source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HAS
      LEN); |  | 
| 4022 |  | 
| 4023                     return(UCOL_IGNORABLE); |  | 
| 4024                 } |  | 
| 4025             } |  | 
| 4026 |  | 
| 4027         case IMPLICIT_TAG:        /* everything that is not defined otherwise */ |  | 
| 4028             return getPrevImplicit(ch, source); |  | 
| 4029 |  | 
| 4030             // TODO: Remove CJK implicits as they are handled by the getImplicit
      Primary function |  | 
| 4031         case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
      */ |  | 
| 4032             return getPrevImplicit(ch, source); |  | 
| 4033 |  | 
| 4034         case SURROGATE_TAG:  /* This is a surrogate pair */ |  | 
| 4035             /* essentially an engaged lead surrogate. */ |  | 
| 4036             /* if you have encountered it here, it means that a */ |  | 
| 4037             /* broken sequence was encountered and this is an error */ |  | 
| 4038             return UCOL_NOT_FOUND; |  | 
| 4039 |  | 
| 4040         case LEAD_SURROGATE_TAG:  /* D800-DBFF*/ |  | 
| 4041             return UCOL_NOT_FOUND; /* broken surrogate sequence */ |  | 
| 4042 |  | 
| 4043         case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/ |  | 
| 4044             { |  | 
| 4045                 UChar32 cp = 0; |  | 
| 4046                 UChar  prevChar; |  | 
| 4047                 const UChar *prev; |  | 
| 4048                 if (isAtStartPrevIterate(source)) { |  | 
| 4049                     /* we are at the start of the string, wrong place to be at *
      / |  | 
| 4050                     return UCOL_NOT_FOUND; |  | 
| 4051                 } |  | 
| 4052                 if (source->pos != source->writableBuffer.getBuffer()) { |  | 
| 4053                     prev     = source->pos - 1; |  | 
| 4054                 } else { |  | 
| 4055                     prev     = source->fcdPosition; |  | 
| 4056                 } |  | 
| 4057                 prevChar = *prev; |  | 
| 4058 |  | 
| 4059                 /* Handles Han and Supplementary characters here.*/ |  | 
| 4060                 if (U16_IS_LEAD(prevChar)) { |  | 
| 4061                     cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<
      10UL)+0xdc00-0x10000)); |  | 
| 4062                     source->pos = prev; |  | 
| 4063                 } else { |  | 
| 4064                     return UCOL_NOT_FOUND; /* like unassigned */ |  | 
| 4065                 } |  | 
| 4066 |  | 
| 4067                 return getPrevImplicit(cp, source); |  | 
| 4068             } |  | 
| 4069 |  | 
| 4070             /* UCA is filled with these. Tailorings are NOT_FOUND */ |  | 
| 4071             /* not yet implemented */ |  | 
| 4072         case CHARSET_TAG:  /* this tag always returns */ |  | 
| 4073             /* probably after 1.8 */ |  | 
| 4074             return UCOL_NOT_FOUND; |  | 
| 4075 |  | 
| 4076         default:           /* this tag always returns */ |  | 
| 4077             *status = U_INTERNAL_PROGRAM_ERROR; |  | 
| 4078             CE=0; |  | 
| 4079             break; |  | 
| 4080         } |  | 
| 4081 |  | 
| 4082         if (CE <= UCOL_NOT_FOUND) { |  | 
| 4083             break; |  | 
| 4084         } |  | 
| 4085     } |  | 
| 4086 |  | 
| 4087     return CE; |  | 
| 4088 } |  | 
| 4089 |  | 
| 4090 /* This should really be a macro                                                
                            */ |  | 
| 4091 /* This function is used to reverse parts of a buffer. We need this operation wh
      en doing continuation */ |  | 
| 4092 /* secondaries in French                                                        
                            */ |  | 
| 4093 /* |  | 
| 4094 void uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) { |  | 
| 4095   uint8_t temp; |  | 
| 4096   while(start<end) { |  | 
| 4097     temp = *start; |  | 
| 4098     *start++ = *end; |  | 
| 4099     *end-- = temp; |  | 
| 4100   } |  | 
| 4101 } |  | 
| 4102 */ |  | 
| 4103 |  | 
| 4104 #define uprv_ucol_reverse_buffer(TYPE, start, end) { \ |  | 
| 4105   TYPE tempA; \ |  | 
| 4106 while((start)<(end)) { \ |  | 
| 4107     tempA = *(start); \ |  | 
| 4108     *(start)++ = *(end); \ |  | 
| 4109     *(end)-- = tempA; \ |  | 
| 4110 } \ |  | 
| 4111 } |  | 
| 4112 |  | 
| 4113 /****************************************************************************/ |  | 
| 4114 /* Following are the sortkey generation functions                           */ |  | 
| 4115 /*                                                                          */ |  | 
| 4116 /****************************************************************************/ |  | 
| 4117 |  | 
| 4118 U_CAPI int32_t U_EXPORT2 | 113 U_CAPI int32_t U_EXPORT2 | 
| 4119 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, | 114 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, | 
| 4120                    const uint8_t *src2, int32_t src2Length, | 115                    const uint8_t *src2, int32_t src2Length, | 
| 4121                    uint8_t *dest, int32_t destCapacity) { | 116                    uint8_t *dest, int32_t destCapacity) { | 
| 4122     /* check arguments */ | 117     /* check arguments */ | 
| 4123     if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[sr
      c1Length-1]!=0) || | 118     if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[sr
      c1Length-1]!=0) || | 
| 4124         src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[sr
      c2Length-1]!=0) || | 119         src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[sr
      c2Length-1]!=0) || | 
| 4125         destCapacity<0 || (destCapacity>0 && dest==NULL) | 120         destCapacity<0 || (destCapacity>0 && dest==NULL) | 
| 4126     ) { | 121     ) { | 
| 4127         /* error, attempt to write a zero byte and return 0 */ | 122         /* error, attempt to write a zero byte and return 0 */ | 
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 4183         /* src1 is not finished, therefore *src2==0, and src1 is appended */ | 178         /* src1 is not finished, therefore *src2==0, and src1 is appended */ | 
| 4184         src2=src1; | 179         src2=src1; | 
| 4185     } | 180     } | 
| 4186     /* append src2, "the other, unfinished sort key" */ | 181     /* append src2, "the other, unfinished sort key" */ | 
| 4187     while((*p++=*src2++)!=0) {} | 182     while((*p++=*src2++)!=0) {} | 
| 4188 | 183 | 
| 4189     /* the actual length might be less than destLength if either sort key contai
      ned illegally embedded zero bytes */ | 184     /* the actual length might be less than destLength if either sort key contai
      ned illegally embedded zero bytes */ | 
| 4190     return (int32_t)(p-dest); | 185     return (int32_t)(p-dest); | 
| 4191 } | 186 } | 
| 4192 | 187 | 
| 4193 U_NAMESPACE_BEGIN |  | 
| 4194 |  | 
| 4195 class SortKeyByteSink : public ByteSink { |  | 
| 4196 public: |  | 
| 4197     SortKeyByteSink(char *dest, int32_t destCapacity) |  | 
| 4198             : buffer_(dest), capacity_(destCapacity), |  | 
| 4199               appended_(0) { |  | 
| 4200         if (buffer_ == NULL) { |  | 
| 4201             capacity_ = 0; |  | 
| 4202         } else if(capacity_ < 0) { |  | 
| 4203             buffer_ = NULL; |  | 
| 4204             capacity_ = 0; |  | 
| 4205         } |  | 
| 4206     } |  | 
| 4207     virtual ~SortKeyByteSink(); |  | 
| 4208 |  | 
| 4209     virtual void Append(const char *bytes, int32_t n); |  | 
| 4210     void Append(uint32_t b) { |  | 
| 4211         if (appended_ < capacity_ || Resize(1, appended_)) { |  | 
| 4212             buffer_[appended_] = (char)b; |  | 
| 4213         } |  | 
| 4214         ++appended_; |  | 
| 4215     } |  | 
| 4216     void Append(uint32_t b1, uint32_t b2) { |  | 
| 4217         int32_t a2 = appended_ + 2; |  | 
| 4218         if (a2 <= capacity_ || Resize(2, appended_)) { |  | 
| 4219             buffer_[appended_] = (char)b1; |  | 
| 4220             buffer_[appended_ + 1] = (char)b2; |  | 
| 4221         } else if(appended_ < capacity_) { |  | 
| 4222             buffer_[appended_] = (char)b1; |  | 
| 4223         } |  | 
| 4224         appended_ = a2; |  | 
| 4225     } |  | 
| 4226     virtual char *GetAppendBuffer(int32_t min_capacity, |  | 
| 4227                                   int32_t desired_capacity_hint, |  | 
| 4228                                   char *scratch, int32_t scratch_capacity, |  | 
| 4229                                   int32_t *result_capacity); |  | 
| 4230     int32_t NumberOfBytesAppended() const { return appended_; } |  | 
| 4231     /** @return FALSE if memory allocation failed */ |  | 
| 4232     UBool IsOk() const { return buffer_ != NULL; } |  | 
| 4233 |  | 
| 4234 protected: |  | 
| 4235     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t leng
      th) = 0; |  | 
| 4236     virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; |  | 
| 4237 |  | 
| 4238     void SetNotOk() { |  | 
| 4239         buffer_ = NULL; |  | 
| 4240         capacity_ = 0; |  | 
| 4241     } |  | 
| 4242 |  | 
| 4243     char *buffer_; |  | 
| 4244     int32_t capacity_; |  | 
| 4245     int32_t appended_; |  | 
| 4246 |  | 
| 4247 private: |  | 
| 4248     SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemente
      d |  | 
| 4249     SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator 
      not implemented |  | 
| 4250 }; |  | 
| 4251 |  | 
| 4252 SortKeyByteSink::~SortKeyByteSink() {} |  | 
| 4253 |  | 
| 4254 void |  | 
| 4255 SortKeyByteSink::Append(const char *bytes, int32_t n) { |  | 
| 4256     if (n <= 0 || bytes == NULL) { |  | 
| 4257         return; |  | 
| 4258     } |  | 
| 4259     int32_t length = appended_; |  | 
| 4260     appended_ += n; |  | 
| 4261     if ((buffer_ + length) == bytes) { |  | 
| 4262         return;  // the caller used GetAppendBuffer() and wrote the bytes alread
      y |  | 
| 4263     } |  | 
| 4264     int32_t available = capacity_ - length; |  | 
| 4265     if (n <= available) { |  | 
| 4266         uprv_memcpy(buffer_ + length, bytes, n); |  | 
| 4267     } else { |  | 
| 4268         AppendBeyondCapacity(bytes, n, length); |  | 
| 4269     } |  | 
| 4270 } |  | 
| 4271 |  | 
| 4272 char * |  | 
| 4273 SortKeyByteSink::GetAppendBuffer(int32_t min_capacity, |  | 
| 4274                                  int32_t desired_capacity_hint, |  | 
| 4275                                  char *scratch, |  | 
| 4276                                  int32_t scratch_capacity, |  | 
| 4277                                  int32_t *result_capacity) { |  | 
| 4278     if (min_capacity < 1 || scratch_capacity < min_capacity) { |  | 
| 4279         *result_capacity = 0; |  | 
| 4280         return NULL; |  | 
| 4281     } |  | 
| 4282     int32_t available = capacity_ - appended_; |  | 
| 4283     if (available >= min_capacity) { |  | 
| 4284         *result_capacity = available; |  | 
| 4285         return buffer_ + appended_; |  | 
| 4286     } else if (Resize(desired_capacity_hint, appended_)) { |  | 
| 4287         *result_capacity = capacity_ - appended_; |  | 
| 4288         return buffer_ + appended_; |  | 
| 4289     } else { |  | 
| 4290         *result_capacity = scratch_capacity; |  | 
| 4291         return scratch; |  | 
| 4292     } |  | 
| 4293 } |  | 
| 4294 |  | 
| 4295 class FixedSortKeyByteSink : public SortKeyByteSink { |  | 
| 4296 public: |  | 
| 4297     FixedSortKeyByteSink(char *dest, int32_t destCapacity) |  | 
| 4298             : SortKeyByteSink(dest, destCapacity) {} |  | 
| 4299     virtual ~FixedSortKeyByteSink(); |  | 
| 4300 |  | 
| 4301 private: |  | 
| 4302     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t leng
      th); |  | 
| 4303     virtual UBool Resize(int32_t appendCapacity, int32_t length); |  | 
| 4304 }; |  | 
| 4305 |  | 
| 4306 FixedSortKeyByteSink::~FixedSortKeyByteSink() {} |  | 
| 4307 |  | 
| 4308 void |  | 
| 4309 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int
      32_t length) { |  | 
| 4310     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_ |  | 
| 4311     // Fill the buffer completely. |  | 
| 4312     int32_t available = capacity_ - length; |  | 
| 4313     if (available > 0) { |  | 
| 4314         uprv_memcpy(buffer_ + length, bytes, available); |  | 
| 4315     } |  | 
| 4316 } |  | 
| 4317 |  | 
| 4318 UBool |  | 
| 4319 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) { |  | 
| 4320     return FALSE; |  | 
| 4321 } |  | 
| 4322 |  | 
| 4323 class CollationKeyByteSink : public SortKeyByteSink { |  | 
| 4324 public: |  | 
| 4325     CollationKeyByteSink(CollationKey &key) |  | 
| 4326             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getC
      apacity()), |  | 
| 4327               key_(key) {} |  | 
| 4328     virtual ~CollationKeyByteSink(); |  | 
| 4329 |  | 
| 4330 private: |  | 
| 4331     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t leng
      th); |  | 
| 4332     virtual UBool Resize(int32_t appendCapacity, int32_t length); |  | 
| 4333 |  | 
| 4334     CollationKey &key_; |  | 
| 4335 }; |  | 
| 4336 |  | 
| 4337 CollationKeyByteSink::~CollationKeyByteSink() {} |  | 
| 4338 |  | 
| 4339 void |  | 
| 4340 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t
       length) { |  | 
| 4341     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_ |  | 
| 4342     if (Resize(n, length)) { |  | 
| 4343         uprv_memcpy(buffer_ + length, bytes, n); |  | 
| 4344     } |  | 
| 4345 } |  | 
| 4346 |  | 
| 4347 UBool |  | 
| 4348 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) { |  | 
| 4349     if (buffer_ == NULL) { |  | 
| 4350         return FALSE;  // allocation failed before already |  | 
| 4351     } |  | 
| 4352     int32_t newCapacity = 2 * capacity_; |  | 
| 4353     int32_t altCapacity = length + 2 * appendCapacity; |  | 
| 4354     if (newCapacity < altCapacity) { |  | 
| 4355         newCapacity = altCapacity; |  | 
| 4356     } |  | 
| 4357     if (newCapacity < 200) { |  | 
| 4358         newCapacity = 200; |  | 
| 4359     } |  | 
| 4360     uint8_t *newBuffer = key_.reallocate(newCapacity, length); |  | 
| 4361     if (newBuffer == NULL) { |  | 
| 4362         SetNotOk(); |  | 
| 4363         return FALSE; |  | 
| 4364     } |  | 
| 4365     buffer_ = reinterpret_cast<char *>(newBuffer); |  | 
| 4366     capacity_ = newCapacity; |  | 
| 4367     return TRUE; |  | 
| 4368 } |  | 
| 4369 |  | 
| 4370 /** |  | 
| 4371  * uint8_t byte buffer, similar to CharString but simpler. |  | 
| 4372  */ |  | 
| 4373 class SortKeyLevel : public UMemory { |  | 
| 4374 public: |  | 
| 4375     SortKeyLevel() : len(0), ok(TRUE) {} |  | 
| 4376     ~SortKeyLevel() {} |  | 
| 4377 |  | 
| 4378     /** @return FALSE if memory allocation failed */ |  | 
| 4379     UBool isOk() const { return ok; } |  | 
| 4380     UBool isEmpty() const { return len == 0; } |  | 
| 4381     int32_t length() const { return len; } |  | 
| 4382     const uint8_t *data() const { return buffer.getAlias(); } |  | 
| 4383     uint8_t operator[](int32_t index) const { return buffer[index]; } |  | 
| 4384 |  | 
| 4385     void appendByte(uint32_t b); |  | 
| 4386 |  | 
| 4387     void appendTo(ByteSink &sink) const { |  | 
| 4388         sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len); |  | 
| 4389     } |  | 
| 4390 |  | 
| 4391     uint8_t &lastByte() { |  | 
| 4392         U_ASSERT(len > 0); |  | 
| 4393         return buffer[len - 1]; |  | 
| 4394     } |  | 
| 4395 |  | 
| 4396     uint8_t *getLastFewBytes(int32_t n) { |  | 
| 4397         if (ok && len >= n) { |  | 
| 4398             return buffer.getAlias() + len - n; |  | 
| 4399         } else { |  | 
| 4400             return NULL; |  | 
| 4401         } |  | 
| 4402     } |  | 
| 4403 |  | 
| 4404 private: |  | 
| 4405     MaybeStackArray<uint8_t, 40> buffer; |  | 
| 4406     int32_t len; |  | 
| 4407     UBool ok; |  | 
| 4408 |  | 
| 4409     UBool ensureCapacity(int32_t appendCapacity); |  | 
| 4410 |  | 
| 4411     SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class |  | 
| 4412     SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of thi
      s class |  | 
| 4413 }; |  | 
| 4414 |  | 
| 4415 void SortKeyLevel::appendByte(uint32_t b) { |  | 
| 4416     if(len < buffer.getCapacity() || ensureCapacity(1)) { |  | 
| 4417         buffer[len++] = (uint8_t)b; |  | 
| 4418     } |  | 
| 4419 } |  | 
| 4420 |  | 
| 4421 UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) { |  | 
| 4422     if(!ok) { |  | 
| 4423         return FALSE; |  | 
| 4424     } |  | 
| 4425     int32_t newCapacity = 2 * buffer.getCapacity(); |  | 
| 4426     int32_t altCapacity = len + 2 * appendCapacity; |  | 
| 4427     if (newCapacity < altCapacity) { |  | 
| 4428         newCapacity = altCapacity; |  | 
| 4429     } |  | 
| 4430     if (newCapacity < 200) { |  | 
| 4431         newCapacity = 200; |  | 
| 4432     } |  | 
| 4433     if(buffer.resize(newCapacity, len)==NULL) { |  | 
| 4434         return ok = FALSE; |  | 
| 4435     } |  | 
| 4436     return TRUE; |  | 
| 4437 } |  | 
| 4438 |  | 
| 4439 U_NAMESPACE_END |  | 
| 4440 |  | 
| 4441 /* sortkey API */ |  | 
| 4442 U_CAPI int32_t U_EXPORT2 | 188 U_CAPI int32_t U_EXPORT2 | 
| 4443 ucol_getSortKey(const    UCollator    *coll, | 189 ucol_getSortKey(const    UCollator    *coll, | 
| 4444         const    UChar        *source, | 190         const    UChar        *source, | 
| 4445         int32_t        sourceLength, | 191         int32_t        sourceLength, | 
| 4446         uint8_t        *result, | 192         uint8_t        *result, | 
| 4447         int32_t        resultLength) | 193         int32_t        resultLength) | 
| 4448 { | 194 { | 
| 4449     UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); | 195     UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); | 
| 4450     if (UTRACE_LEVEL(UTRACE_VERBOSE)) { | 196     if (UTRACE_LEVEL(UTRACE_VERBOSE)) { | 
| 4451         UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, sour
      ce, | 197         UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, sour
      ce, | 
| 4452             ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLengt
      h)); | 198             ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLengt
      h)); | 
| 4453     } | 199     } | 
| 4454 | 200 | 
| 4455     if(coll->delegate != NULL) { | 201     int32_t keySize = Collator::fromUCollator(coll)-> | 
| 4456       return ((const Collator*)coll->delegate)->getSortKey(source, sourceLength,
       result, resultLength); | 202             getSortKey(source, sourceLength, result, resultLength); | 
| 4457     } | 203 | 
| 4458 |  | 
| 4459     UErrorCode status = U_ZERO_ERROR; |  | 
| 4460     int32_t keySize   = 0; |  | 
| 4461 |  | 
| 4462     if(source != NULL) { |  | 
| 4463         // source == NULL is actually an error situation, but we would need to |  | 
| 4464         // have an error code to return it. Until we introduce a new |  | 
| 4465         // API, it stays like this |  | 
| 4466 |  | 
| 4467         /* this uses the function pointer that is set in updateinternalstate */ |  | 
| 4468         /* currently, there are two funcs: */ |  | 
| 4469         /*ucol_calcSortKey(...);*/ |  | 
| 4470         /*ucol_calcSortKeySimpleTertiary(...);*/ |  | 
| 4471 |  | 
| 4472         uint8_t noDest[1] = { 0 }; |  | 
| 4473         if(result == NULL) { |  | 
| 4474             // Distinguish pure preflighting from an allocation error. |  | 
| 4475             result = noDest; |  | 
| 4476             resultLength = 0; |  | 
| 4477         } |  | 
| 4478         FixedSortKeyByteSink sink(reinterpret_cast<char *>(result), resultLength
      ); |  | 
| 4479         coll->sortKeyGen(coll, source, sourceLength, sink, &status); |  | 
| 4480         if(U_SUCCESS(status)) { |  | 
| 4481             keySize = sink.NumberOfBytesAppended(); |  | 
| 4482         } |  | 
| 4483     } |  | 
| 4484     UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); | 204     UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); | 
| 4485     UTRACE_EXIT_STATUS(status); | 205     UTRACE_EXIT_VALUE(keySize); | 
| 4486     return keySize; | 206     return keySize; | 
| 4487 } | 207 } | 
| 4488 | 208 | 
| 4489 U_CFUNC int32_t |  | 
| 4490 ucol_getCollationKey(const UCollator *coll, |  | 
| 4491                      const UChar *source, int32_t sourceLength, |  | 
| 4492                      CollationKey &key, |  | 
| 4493                      UErrorCode &errorCode) { |  | 
| 4494     CollationKeyByteSink sink(key); |  | 
| 4495     coll->sortKeyGen(coll, source, sourceLength, sink, &errorCode); |  | 
| 4496     return sink.NumberOfBytesAppended(); |  | 
| 4497 } |  | 
| 4498 |  | 
| 4499 // Is this primary weight compressible? |  | 
| 4500 // Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit). |  | 
| 4501 // TODO: This should use per-lead-byte flags from FractionalUCA.txt. |  | 
| 4502 static inline UBool |  | 
| 4503 isCompressible(const UCollator * /*coll*/, uint8_t primary1) { |  | 
| 4504     return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegul
      arPrimary; |  | 
| 4505 } |  | 
| 4506 |  | 
| 4507 static |  | 
| 4508 inline void doCaseShift(SortKeyLevel &cases, uint32_t &caseShift) { |  | 
| 4509     if (caseShift  == 0) { |  | 
| 4510         cases.appendByte(UCOL_CASE_BYTE_START); |  | 
| 4511         caseShift = UCOL_CASE_SHIFT_START; |  | 
| 4512     } |  | 
| 4513 } |  | 
| 4514 |  | 
| 4515 // Packs the secondary buffer when processing French locale. |  | 
| 4516 static void |  | 
| 4517 packFrench(const uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result)
       { |  | 
| 4518     secondaries += secsize;  // We read the secondary-level bytes back to front. |  | 
| 4519     uint8_t secondary; |  | 
| 4520     int32_t count2 = 0; |  | 
| 4521     int32_t i = 0; |  | 
| 4522     // we use i here since the key size already accounts for terminators, so we'
      ll discard the increment |  | 
| 4523     for(i = 0; i<secsize; i++) { |  | 
| 4524         secondary = *(secondaries-i-1); |  | 
| 4525         /* This is compression code. */ |  | 
| 4526         if (secondary == UCOL_COMMON2) { |  | 
| 4527             ++count2; |  | 
| 4528         } else { |  | 
| 4529             if (count2 > 0) { |  | 
| 4530                 if (secondary > UCOL_COMMON2) { // not necessary for 4th level. |  | 
| 4531                     while (count2 > UCOL_TOP_COUNT2) { |  | 
| 4532                         result.Append(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2); |  | 
| 4533                         count2 -= (uint32_t)UCOL_TOP_COUNT2; |  | 
| 4534                     } |  | 
| 4535                     result.Append(UCOL_COMMON_TOP2 - (count2-1)); |  | 
| 4536                 } else { |  | 
| 4537                     while (count2 > UCOL_BOT_COUNT2) { |  | 
| 4538                         result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); |  | 
| 4539                         count2 -= (uint32_t)UCOL_BOT_COUNT2; |  | 
| 4540                     } |  | 
| 4541                     result.Append(UCOL_COMMON_BOT2 + (count2-1)); |  | 
| 4542                 } |  | 
| 4543                 count2 = 0; |  | 
| 4544             } |  | 
| 4545             result.Append(secondary); |  | 
| 4546         } |  | 
| 4547     } |  | 
| 4548     if (count2 > 0) { |  | 
| 4549         while (count2 > UCOL_BOT_COUNT2) { |  | 
| 4550             result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); |  | 
| 4551             count2 -= (uint32_t)UCOL_BOT_COUNT2; |  | 
| 4552         } |  | 
| 4553         result.Append(UCOL_COMMON_BOT2 + (count2-1)); |  | 
| 4554     } |  | 
| 4555 } |  | 
| 4556 |  | 
| 4557 #define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0 |  | 
| 4558 |  | 
| 4559 /* This is the sortkey work horse function */ |  | 
| 4560 U_CFUNC void U_CALLCONV |  | 
| 4561 ucol_calcSortKey(const    UCollator    *coll, |  | 
| 4562         const    UChar        *source, |  | 
| 4563         int32_t        sourceLength, |  | 
| 4564         SortKeyByteSink &result, |  | 
| 4565         UErrorCode *status) |  | 
| 4566 { |  | 
| 4567     if(U_FAILURE(*status)) { |  | 
| 4568         return; |  | 
| 4569     } |  | 
| 4570 |  | 
| 4571     SortKeyByteSink &primaries = result; |  | 
| 4572     SortKeyLevel secondaries; |  | 
| 4573     SortKeyLevel tertiaries; |  | 
| 4574     SortKeyLevel cases; |  | 
| 4575     SortKeyLevel quads; |  | 
| 4576 |  | 
| 4577     UnicodeString normSource; |  | 
| 4578 |  | 
| 4579     int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength); |  | 
| 4580 |  | 
| 4581     UColAttributeValue strength = coll->strength; |  | 
| 4582 |  | 
| 4583     uint8_t compareSec   = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF); |  | 
| 4584     uint8_t compareTer   = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF); |  | 
| 4585     uint8_t compareQuad  = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF); |  | 
| 4586     UBool  compareIdent = (strength == UCOL_IDENTICAL); |  | 
| 4587     UBool  doCase = (coll->caseLevel == UCOL_ON); |  | 
| 4588     UBool  isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0)
      ; |  | 
| 4589     UBool  shifted = (coll->alternateHandling == UCOL_SHIFTED); |  | 
| 4590     //UBool  qShifted = shifted && (compareQuad == 0); |  | 
| 4591     UBool  doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0); |  | 
| 4592 |  | 
| 4593     uint32_t variableTopValue = coll->variableTopValue; |  | 
| 4594     // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no |  | 
| 4595     // qShifted, we don't need to set UCOL_COMMON_BOT4 so high. |  | 
| 4596     uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1); |  | 
| 4597     uint8_t UCOL_HIRAGANA_QUAD = 0; |  | 
| 4598     if(doHiragana) { |  | 
| 4599         UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++; |  | 
| 4600         /* allocate one more space for hiragana, value for hiragana */ |  | 
| 4601     } |  | 
| 4602     uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4); |  | 
| 4603 |  | 
| 4604     /* support for special features like caselevel and funky secondaries */ |  | 
| 4605     int32_t lastSecondaryLength = 0; |  | 
| 4606     uint32_t caseShift = 0; |  | 
| 4607 |  | 
| 4608     /* If we need to normalize, we'll do it all at once at the beginning! */ |  | 
| 4609     const Normalizer2 *norm2; |  | 
| 4610     if(compareIdent) { |  | 
| 4611         norm2 = Normalizer2Factory::getNFDInstance(*status); |  | 
| 4612     } else if(coll->normalizationMode != UCOL_OFF) { |  | 
| 4613         norm2 = Normalizer2Factory::getFCDInstance(*status); |  | 
| 4614     } else { |  | 
| 4615         norm2 = NULL; |  | 
| 4616     } |  | 
| 4617     if(norm2 != NULL) { |  | 
| 4618         normSource.setTo(FALSE, source, len); |  | 
| 4619         int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); |  | 
| 4620         if(qcYesLength != len) { |  | 
| 4621             UnicodeString unnormalized = normSource.tempSubString(qcYesLength); |  | 
| 4622             normSource.truncate(qcYesLength); |  | 
| 4623             norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); |  | 
| 4624             source = normSource.getBuffer(); |  | 
| 4625             len = normSource.length(); |  | 
| 4626         } |  | 
| 4627     } |  | 
| 4628     collIterate s; |  | 
| 4629     IInit_collIterate(coll, source, len, &s, status); |  | 
| 4630     if(U_FAILURE(*status)) { |  | 
| 4631         return; |  | 
| 4632     } |  | 
| 4633     s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was norma
      lized. |  | 
| 4634 |  | 
| 4635     uint32_t order = 0; |  | 
| 4636 |  | 
| 4637     uint8_t primary1 = 0; |  | 
| 4638     uint8_t primary2 = 0; |  | 
| 4639     uint8_t secondary = 0; |  | 
| 4640     uint8_t tertiary = 0; |  | 
| 4641     uint8_t caseSwitch = coll->caseSwitch; |  | 
| 4642     uint8_t tertiaryMask = coll->tertiaryMask; |  | 
| 4643     int8_t tertiaryAddition = coll->tertiaryAddition; |  | 
| 4644     uint8_t tertiaryTop = coll->tertiaryTop; |  | 
| 4645     uint8_t tertiaryBottom = coll->tertiaryBottom; |  | 
| 4646     uint8_t tertiaryCommon = coll->tertiaryCommon; |  | 
| 4647     uint8_t caseBits = 0; |  | 
| 4648 |  | 
| 4649     UBool wasShifted = FALSE; |  | 
| 4650     UBool notIsContinuation = FALSE; |  | 
| 4651 |  | 
| 4652     uint32_t count2 = 0, count3 = 0, count4 = 0; |  | 
| 4653     uint8_t leadPrimary = 0; |  | 
| 4654 |  | 
| 4655     for(;;) { |  | 
| 4656         order = ucol_IGetNextCE(coll, &s, status); |  | 
| 4657         if(order == UCOL_NO_MORE_CES) { |  | 
| 4658             break; |  | 
| 4659         } |  | 
| 4660 |  | 
| 4661         if(order == 0) { |  | 
| 4662             continue; |  | 
| 4663         } |  | 
| 4664 |  | 
| 4665         notIsContinuation = !isContinuation(order); |  | 
| 4666 |  | 
| 4667         if(notIsContinuation) { |  | 
| 4668             tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK); |  | 
| 4669         } else { |  | 
| 4670             tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION)); |  | 
| 4671         } |  | 
| 4672 |  | 
| 4673         secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); |  | 
| 4674         primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); |  | 
| 4675         primary1 = (uint8_t)(order >> 8); |  | 
| 4676 |  | 
| 4677         uint8_t originalPrimary1 = primary1; |  | 
| 4678         if(notIsContinuation && coll->leadBytePermutationTable != NULL) { |  | 
| 4679             primary1 = coll->leadBytePermutationTable[primary1]; |  | 
| 4680         } |  | 
| 4681 |  | 
| 4682         if((shifted && ((notIsContinuation && order <= variableTopValue && prima
      ry1 > 0) |  | 
| 4683                         || (!notIsContinuation && wasShifted))) |  | 
| 4684             || (wasShifted && primary1 == 0)) /* amendment to the UCA says that 
      primary ignorables */ |  | 
| 4685         { |  | 
| 4686             /* and other ignorables should be removed if following a shifted cod
      e point */ |  | 
| 4687             if(primary1 == 0) { /* if we were shifted and we got an ignorable co
      de point */ |  | 
| 4688                 /* we should just completely ignore it */ |  | 
| 4689                 continue; |  | 
| 4690             } |  | 
| 4691             if(compareQuad == 0) { |  | 
| 4692                 if(count4 > 0) { |  | 
| 4693                     while (count4 > UCOL_BOT_COUNT4) { |  | 
| 4694                         quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); |  | 
| 4695                         count4 -= UCOL_BOT_COUNT4; |  | 
| 4696                     } |  | 
| 4697                     quads.appendByte(UCOL_COMMON_BOT4 + (count4-1)); |  | 
| 4698                     count4 = 0; |  | 
| 4699                 } |  | 
| 4700                 /* We are dealing with a variable and we're treating them as shi
      fted */ |  | 
| 4701                 /* This is a shifted ignorable */ |  | 
| 4702                 if(primary1 != 0) { /* we need to check this since we could be i
      n continuation */ |  | 
| 4703                     quads.appendByte(primary1); |  | 
| 4704                 } |  | 
| 4705                 if(primary2 != 0) { |  | 
| 4706                     quads.appendByte(primary2); |  | 
| 4707                 } |  | 
| 4708             } |  | 
| 4709             wasShifted = TRUE; |  | 
| 4710         } else { |  | 
| 4711             wasShifted = FALSE; |  | 
| 4712             /* Note: This code assumes that the table is well built i.e. not hav
      ing 0 bytes where they are not supposed to be. */ |  | 
| 4713             /* Usually, we'll have non-zero primary1 & primary2, except in cases
       of a-z and friends, when primary2 will   */ |  | 
| 4714             /* regular and simple sortkey calc */ |  | 
| 4715             if(primary1 != UCOL_IGNORABLE) { |  | 
| 4716                 if(notIsContinuation) { |  | 
| 4717                     if(leadPrimary == primary1) { |  | 
| 4718                         primaries.Append(primary2); |  | 
| 4719                     } else { |  | 
| 4720                         if(leadPrimary != 0) { |  | 
| 4721                             primaries.Append((primary1 > leadPrimary) ? UCOL_BYT
      E_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN); |  | 
| 4722                         } |  | 
| 4723                         if(primary2 == UCOL_IGNORABLE) { |  | 
| 4724                             /* one byter, not compressed */ |  | 
| 4725                             primaries.Append(primary1); |  | 
| 4726                             leadPrimary = 0; |  | 
| 4727                         } else if(isCompressible(coll, originalPrimary1)) { |  | 
| 4728                             /* compress */ |  | 
| 4729                             primaries.Append(leadPrimary = primary1, primary2); |  | 
| 4730                         } else { |  | 
| 4731                             leadPrimary = 0; |  | 
| 4732                             primaries.Append(primary1, primary2); |  | 
| 4733                         } |  | 
| 4734                     } |  | 
| 4735                 } else { /* we are in continuation, so we're gonna add primary t
      o the key don't care about compression */ |  | 
| 4736                     if(primary2 == UCOL_IGNORABLE) { |  | 
| 4737                         primaries.Append(primary1); |  | 
| 4738                     } else { |  | 
| 4739                         primaries.Append(primary1, primary2); |  | 
| 4740                     } |  | 
| 4741                 } |  | 
| 4742             } |  | 
| 4743 |  | 
| 4744             if(secondary > compareSec) { |  | 
| 4745                 if(!isFrenchSec) { |  | 
| 4746                     /* This is compression code. */ |  | 
| 4747                     if (secondary == UCOL_COMMON2 && notIsContinuation) { |  | 
| 4748                         ++count2; |  | 
| 4749                     } else { |  | 
| 4750                         if (count2 > 0) { |  | 
| 4751                             if (secondary > UCOL_COMMON2) { // not necessary for
       4th level. |  | 
| 4752                                 while (count2 > UCOL_TOP_COUNT2) { |  | 
| 4753                                     secondaries.appendByte(UCOL_COMMON_TOP2 - UC
      OL_TOP_COUNT2); |  | 
| 4754                                     count2 -= (uint32_t)UCOL_TOP_COUNT2; |  | 
| 4755                                 } |  | 
| 4756                                 secondaries.appendByte(UCOL_COMMON_TOP2 - (count
      2-1)); |  | 
| 4757                             } else { |  | 
| 4758                                 while (count2 > UCOL_BOT_COUNT2) { |  | 
| 4759                                     secondaries.appendByte(UCOL_COMMON_BOT2 + UC
      OL_BOT_COUNT2); |  | 
| 4760                                     count2 -= (uint32_t)UCOL_BOT_COUNT2; |  | 
| 4761                                 } |  | 
| 4762                                 secondaries.appendByte(UCOL_COMMON_BOT2 + (count
      2-1)); |  | 
| 4763                             } |  | 
| 4764                             count2 = 0; |  | 
| 4765                         } |  | 
| 4766                         secondaries.appendByte(secondary); |  | 
| 4767                     } |  | 
| 4768                 } else { |  | 
| 4769                     /* Do the special handling for French secondaries */ |  | 
| 4770                     /* We need to get continuation elements and do intermediate 
      restore */ |  | 
| 4771                     /* abc1c2c3de with french secondaries need to be edc1c2c3ba 
      NOT edc3c2c1ba */ |  | 
| 4772                     if(notIsContinuation) { |  | 
| 4773                         if (lastSecondaryLength > 1) { |  | 
| 4774                             uint8_t *frenchStartPtr = secondaries.getLastFewByte
      s(lastSecondaryLength); |  | 
| 4775                             if (frenchStartPtr != NULL) { |  | 
| 4776                                 /* reverse secondaries from frenchStartPtr up to
       frenchEndPtr */ |  | 
| 4777                                 uint8_t *frenchEndPtr = frenchStartPtr + lastSec
      ondaryLength - 1; |  | 
| 4778                                 uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr
      , frenchEndPtr); |  | 
| 4779                             } |  | 
| 4780                         } |  | 
| 4781                         lastSecondaryLength = 1; |  | 
| 4782                     } else { |  | 
| 4783                         ++lastSecondaryLength; |  | 
| 4784                     } |  | 
| 4785                     secondaries.appendByte(secondary); |  | 
| 4786                 } |  | 
| 4787             } |  | 
| 4788 |  | 
| 4789             if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) { |  | 
| 4790                 // do the case level if we need to do it. We don't want to calcu
      late |  | 
| 4791                 // case level for primary ignorables if we have only primary str
      ength and case level |  | 
| 4792                 // otherwise we would break well formedness of CEs |  | 
| 4793                 doCaseShift(cases, caseShift); |  | 
| 4794                 if(notIsContinuation) { |  | 
| 4795                     caseBits = (uint8_t)(tertiary & 0xC0); |  | 
| 4796 |  | 
| 4797                     if(tertiary != 0) { |  | 
| 4798                         if(coll->caseFirst == UCOL_UPPER_FIRST) { |  | 
| 4799                             if((caseBits & 0xC0) == 0) { |  | 
| 4800                                 cases.lastByte() |= 1 << (--caseShift); |  | 
| 4801                             } else { |  | 
| 4802                                 cases.lastByte() |= 0 << (--caseShift); |  | 
| 4803                                 /* second bit */ |  | 
| 4804                                 doCaseShift(cases, caseShift); |  | 
| 4805                                 cases.lastByte() |= ((caseBits>>6)&1) << (--case
      Shift); |  | 
| 4806                             } |  | 
| 4807                         } else { |  | 
| 4808                             if((caseBits & 0xC0) == 0) { |  | 
| 4809                                 cases.lastByte() |= 0 << (--caseShift); |  | 
| 4810                             } else { |  | 
| 4811                                 cases.lastByte() |= 1 << (--caseShift); |  | 
| 4812                                 /* second bit */ |  | 
| 4813                                 doCaseShift(cases, caseShift); |  | 
| 4814                                 cases.lastByte() |= ((caseBits>>7)&1) << (--case
      Shift); |  | 
| 4815                             } |  | 
| 4816                         } |  | 
| 4817                     } |  | 
| 4818                 } |  | 
| 4819             } else { |  | 
| 4820                 if(notIsContinuation) { |  | 
| 4821                     tertiary ^= caseSwitch; |  | 
| 4822                 } |  | 
| 4823             } |  | 
| 4824 |  | 
| 4825             tertiary &= tertiaryMask; |  | 
| 4826             if(tertiary > compareTer) { |  | 
| 4827                 /* This is compression code. */ |  | 
| 4828                 /* sequence size check is included in the if clause */ |  | 
| 4829                 if (tertiary == tertiaryCommon && notIsContinuation) { |  | 
| 4830                     ++count3; |  | 
| 4831                 } else { |  | 
| 4832                     if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMO
      N3_NORMAL) { |  | 
| 4833                         tertiary += tertiaryAddition; |  | 
| 4834                     } else if(tertiary <= tertiaryCommon && tertiaryCommon == UC
      OL_COMMON3_UPPERFIRST) { |  | 
| 4835                         tertiary -= tertiaryAddition; |  | 
| 4836                     } |  | 
| 4837                     if (count3 > 0) { |  | 
| 4838                         if ((tertiary > tertiaryCommon)) { |  | 
| 4839                             while (count3 > coll->tertiaryTopCount) { |  | 
| 4840                                 tertiaries.appendByte(tertiaryTop - coll->tertia
      ryTopCount); |  | 
| 4841                                 count3 -= (uint32_t)coll->tertiaryTopCount; |  | 
| 4842                             } |  | 
| 4843                             tertiaries.appendByte(tertiaryTop - (count3-1)); |  | 
| 4844                         } else { |  | 
| 4845                             while (count3 > coll->tertiaryBottomCount) { |  | 
| 4846                                 tertiaries.appendByte(tertiaryBottom + coll->ter
      tiaryBottomCount); |  | 
| 4847                                 count3 -= (uint32_t)coll->tertiaryBottomCount; |  | 
| 4848                             } |  | 
| 4849                             tertiaries.appendByte(tertiaryBottom + (count3-1)); |  | 
| 4850                         } |  | 
| 4851                         count3 = 0; |  | 
| 4852                     } |  | 
| 4853                     tertiaries.appendByte(tertiary); |  | 
| 4854                 } |  | 
| 4855             } |  | 
| 4856 |  | 
| 4857             if(/*qShifted*/(compareQuad==0)  && notIsContinuation) { |  | 
| 4858                 if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we ne
      ed to note it |  | 
| 4859                     if(count4>0) { // Close this part |  | 
| 4860                         while (count4 > UCOL_BOT_COUNT4) { |  | 
| 4861                             quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4)
      ; |  | 
| 4862                             count4 -= UCOL_BOT_COUNT4; |  | 
| 4863                         } |  | 
| 4864                         quads.appendByte(UCOL_COMMON_BOT4 + (count4-1)); |  | 
| 4865                         count4 = 0; |  | 
| 4866                     } |  | 
| 4867                     quads.appendByte(UCOL_HIRAGANA_QUAD); // Add the Hiragana |  | 
| 4868                 } else { // This wasn't Hiragana, so we can continue adding stuf
      f |  | 
| 4869                     count4++; |  | 
| 4870                 } |  | 
| 4871             } |  | 
| 4872         } |  | 
| 4873     } |  | 
| 4874 |  | 
| 4875     /* Here, we are generally done with processing */ |  | 
| 4876     /* bailing out would not be too productive */ |  | 
| 4877 |  | 
| 4878     UBool ok = TRUE; |  | 
| 4879     if(U_SUCCESS(*status)) { |  | 
| 4880         /* we have done all the CE's, now let's put them together to form a key 
      */ |  | 
| 4881         if(compareSec == 0) { |  | 
| 4882             if (count2 > 0) { |  | 
| 4883                 while (count2 > UCOL_BOT_COUNT2) { |  | 
| 4884                     secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); |  | 
| 4885                     count2 -= (uint32_t)UCOL_BOT_COUNT2; |  | 
| 4886                 } |  | 
| 4887                 secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1)); |  | 
| 4888             } |  | 
| 4889             result.Append(UCOL_LEVELTERMINATOR); |  | 
| 4890             if(!secondaries.isOk()) { |  | 
| 4891                 ok = FALSE; |  | 
| 4892             } else if(!isFrenchSec) { |  | 
| 4893                 secondaries.appendTo(result); |  | 
| 4894             } else { |  | 
| 4895                 // If there are any unresolved continuation secondaries, |  | 
| 4896                 // reverse them here so that we can reverse the whole secondary 
      thing. |  | 
| 4897                 if (lastSecondaryLength > 1) { |  | 
| 4898                     uint8_t *frenchStartPtr = secondaries.getLastFewBytes(lastSe
      condaryLength); |  | 
| 4899                     if (frenchStartPtr != NULL) { |  | 
| 4900                         /* reverse secondaries from frenchStartPtr up to frenchE
      ndPtr */ |  | 
| 4901                         uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLe
      ngth - 1; |  | 
| 4902                         uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, french
      EndPtr); |  | 
| 4903                     } |  | 
| 4904                 } |  | 
| 4905                 packFrench(secondaries.data(), secondaries.length(), result); |  | 
| 4906             } |  | 
| 4907         } |  | 
| 4908 |  | 
| 4909         if(doCase) { |  | 
| 4910             ok &= cases.isOk(); |  | 
| 4911             result.Append(UCOL_LEVELTERMINATOR); |  | 
| 4912             cases.appendTo(result); |  | 
| 4913         } |  | 
| 4914 |  | 
| 4915         if(compareTer == 0) { |  | 
| 4916             if (count3 > 0) { |  | 
| 4917                 if (coll->tertiaryCommon != UCOL_COMMON_BOT3) { |  | 
| 4918                     while (count3 >= coll->tertiaryTopCount) { |  | 
| 4919                         tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCou
      nt); |  | 
| 4920                         count3 -= (uint32_t)coll->tertiaryTopCount; |  | 
| 4921                     } |  | 
| 4922                     tertiaries.appendByte(tertiaryTop - count3); |  | 
| 4923                 } else { |  | 
| 4924                     while (count3 > coll->tertiaryBottomCount) { |  | 
| 4925                         tertiaries.appendByte(tertiaryBottom + coll->tertiaryBot
      tomCount); |  | 
| 4926                         count3 -= (uint32_t)coll->tertiaryBottomCount; |  | 
| 4927                     } |  | 
| 4928                     tertiaries.appendByte(tertiaryBottom + (count3-1)); |  | 
| 4929                 } |  | 
| 4930             } |  | 
| 4931             ok &= tertiaries.isOk(); |  | 
| 4932             result.Append(UCOL_LEVELTERMINATOR); |  | 
| 4933             tertiaries.appendTo(result); |  | 
| 4934 |  | 
| 4935             if(compareQuad == 0/*qShifted == TRUE*/) { |  | 
| 4936                 if(count4 > 0) { |  | 
| 4937                     while (count4 > UCOL_BOT_COUNT4) { |  | 
| 4938                         quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); |  | 
| 4939                         count4 -= UCOL_BOT_COUNT4; |  | 
| 4940                     } |  | 
| 4941                     quads.appendByte(UCOL_COMMON_BOT4 + (count4-1)); |  | 
| 4942                 } |  | 
| 4943                 ok &= quads.isOk(); |  | 
| 4944                 result.Append(UCOL_LEVELTERMINATOR); |  | 
| 4945                 quads.appendTo(result); |  | 
| 4946             } |  | 
| 4947 |  | 
| 4948             if(compareIdent) { |  | 
| 4949                 result.Append(UCOL_LEVELTERMINATOR); |  | 
| 4950                 u_writeIdenticalLevelRun(s.string, len, result); |  | 
| 4951             } |  | 
| 4952         } |  | 
| 4953         result.Append(0); |  | 
| 4954     } |  | 
| 4955 |  | 
| 4956     /* To avoid memory leak, free the offset buffer if necessary. */ |  | 
| 4957     ucol_freeOffsetBuffer(&s); |  | 
| 4958 |  | 
| 4959     ok &= result.IsOk(); |  | 
| 4960     if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; } |  | 
| 4961 } |  | 
| 4962 |  | 
| 4963 |  | 
| 4964 U_CFUNC void U_CALLCONV |  | 
| 4965 ucol_calcSortKeySimpleTertiary(const    UCollator    *coll, |  | 
| 4966         const    UChar        *source, |  | 
| 4967         int32_t        sourceLength, |  | 
| 4968         SortKeyByteSink &result, |  | 
| 4969         UErrorCode *status) |  | 
| 4970 { |  | 
| 4971     U_ALIGN_CODE(16); |  | 
| 4972 |  | 
| 4973     if(U_FAILURE(*status)) { |  | 
| 4974         return; |  | 
| 4975     } |  | 
| 4976 |  | 
| 4977     SortKeyByteSink &primaries = result; |  | 
| 4978     SortKeyLevel secondaries; |  | 
| 4979     SortKeyLevel tertiaries; |  | 
| 4980 |  | 
| 4981     UnicodeString normSource; |  | 
| 4982 |  | 
| 4983     int32_t len =  sourceLength; |  | 
| 4984 |  | 
| 4985     /* If we need to normalize, we'll do it all at once at the beginning! */ |  | 
| 4986     if(coll->normalizationMode != UCOL_OFF) { |  | 
| 4987         normSource.setTo(len < 0, source, len); |  | 
| 4988         const Normalizer2 *norm2 = Normalizer2Factory::getFCDInstance(*status); |  | 
| 4989         int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status); |  | 
| 4990         if(qcYesLength != normSource.length()) { |  | 
| 4991             UnicodeString unnormalized = normSource.tempSubString(qcYesLength); |  | 
| 4992             normSource.truncate(qcYesLength); |  | 
| 4993             norm2->normalizeSecondAndAppend(normSource, unnormalized, *status); |  | 
| 4994             source = normSource.getBuffer(); |  | 
| 4995             len = normSource.length(); |  | 
| 4996         } |  | 
| 4997     } |  | 
| 4998     collIterate s; |  | 
| 4999     IInit_collIterate(coll, (UChar *)source, len, &s, status); |  | 
| 5000     if(U_FAILURE(*status)) { |  | 
| 5001         return; |  | 
| 5002     } |  | 
| 5003     s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was norma
      lized. |  | 
| 5004 |  | 
| 5005     uint32_t order = 0; |  | 
| 5006 |  | 
| 5007     uint8_t primary1 = 0; |  | 
| 5008     uint8_t primary2 = 0; |  | 
| 5009     uint8_t secondary = 0; |  | 
| 5010     uint8_t tertiary = 0; |  | 
| 5011     uint8_t caseSwitch = coll->caseSwitch; |  | 
| 5012     uint8_t tertiaryMask = coll->tertiaryMask; |  | 
| 5013     int8_t tertiaryAddition = coll->tertiaryAddition; |  | 
| 5014     uint8_t tertiaryTop = coll->tertiaryTop; |  | 
| 5015     uint8_t tertiaryBottom = coll->tertiaryBottom; |  | 
| 5016     uint8_t tertiaryCommon = coll->tertiaryCommon; |  | 
| 5017 |  | 
| 5018     UBool notIsContinuation = FALSE; |  | 
| 5019 |  | 
| 5020     uint32_t count2 = 0, count3 = 0; |  | 
| 5021     uint8_t leadPrimary = 0; |  | 
| 5022 |  | 
| 5023     for(;;) { |  | 
| 5024         order = ucol_IGetNextCE(coll, &s, status); |  | 
| 5025 |  | 
| 5026         if(order == 0) { |  | 
| 5027             continue; |  | 
| 5028         } |  | 
| 5029 |  | 
| 5030         if(order == UCOL_NO_MORE_CES) { |  | 
| 5031             break; |  | 
| 5032         } |  | 
| 5033 |  | 
| 5034         notIsContinuation = !isContinuation(order); |  | 
| 5035 |  | 
| 5036         if(notIsContinuation) { |  | 
| 5037             tertiary = (uint8_t)((order & tertiaryMask)); |  | 
| 5038         } else { |  | 
| 5039             tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION)); |  | 
| 5040         } |  | 
| 5041 |  | 
| 5042         secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); |  | 
| 5043         primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK); |  | 
| 5044         primary1 = (uint8_t)(order >> 8); |  | 
| 5045 |  | 
| 5046         uint8_t originalPrimary1 = primary1; |  | 
| 5047         if (coll->leadBytePermutationTable != NULL && notIsContinuation) { |  | 
| 5048             primary1 = coll->leadBytePermutationTable[primary1]; |  | 
| 5049         } |  | 
| 5050 |  | 
| 5051         /* Note: This code assumes that the table is well built i.e. not having 
      0 bytes where they are not supposed to be. */ |  | 
| 5052         /* Usually, we'll have non-zero primary1 & primary2, except in cases of 
      a-z and friends, when primary2 will   */ |  | 
| 5053         /* be zero with non zero primary1. primary3 is different than 0 only for
       long primaries - see above.               */ |  | 
| 5054         /* regular and simple sortkey calc */ |  | 
| 5055         if(primary1 != UCOL_IGNORABLE) { |  | 
| 5056             if(notIsContinuation) { |  | 
| 5057                 if(leadPrimary == primary1) { |  | 
| 5058                     primaries.Append(primary2); |  | 
| 5059                 } else { |  | 
| 5060                     if(leadPrimary != 0) { |  | 
| 5061                         primaries.Append((primary1 > leadPrimary) ? UCOL_BYTE_UN
      SHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN); |  | 
| 5062                     } |  | 
| 5063                     if(primary2 == UCOL_IGNORABLE) { |  | 
| 5064                         /* one byter, not compressed */ |  | 
| 5065                         primaries.Append(primary1); |  | 
| 5066                         leadPrimary = 0; |  | 
| 5067                     } else if(isCompressible(coll, originalPrimary1)) { |  | 
| 5068                         /* compress */ |  | 
| 5069                         primaries.Append(leadPrimary = primary1, primary2); |  | 
| 5070                     } else { |  | 
| 5071                         leadPrimary = 0; |  | 
| 5072                         primaries.Append(primary1, primary2); |  | 
| 5073                     } |  | 
| 5074                 } |  | 
| 5075             } else { /* we are in continuation, so we're gonna add primary to th
      e key don't care about compression */ |  | 
| 5076                 if(primary2 == UCOL_IGNORABLE) { |  | 
| 5077                     primaries.Append(primary1); |  | 
| 5078                 } else { |  | 
| 5079                     primaries.Append(primary1, primary2); |  | 
| 5080                 } |  | 
| 5081             } |  | 
| 5082         } |  | 
| 5083 |  | 
| 5084         if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */ |  | 
| 5085             /* This is compression code. */ |  | 
| 5086             if (secondary == UCOL_COMMON2 && notIsContinuation) { |  | 
| 5087                 ++count2; |  | 
| 5088             } else { |  | 
| 5089                 if (count2 > 0) { |  | 
| 5090                     if (secondary > UCOL_COMMON2) { // not necessary for 4th lev
      el. |  | 
| 5091                         while (count2 > UCOL_TOP_COUNT2) { |  | 
| 5092                             secondaries.appendByte(UCOL_COMMON_TOP2 - UCOL_TOP_C
      OUNT2); |  | 
| 5093                             count2 -= (uint32_t)UCOL_TOP_COUNT2; |  | 
| 5094                         } |  | 
| 5095                         secondaries.appendByte(UCOL_COMMON_TOP2 - (count2-1)); |  | 
| 5096                     } else { |  | 
| 5097                         while (count2 > UCOL_BOT_COUNT2) { |  | 
| 5098                             secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_C
      OUNT2); |  | 
| 5099                             count2 -= (uint32_t)UCOL_BOT_COUNT2; |  | 
| 5100                         } |  | 
| 5101                         secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1)); |  | 
| 5102                     } |  | 
| 5103                     count2 = 0; |  | 
| 5104                 } |  | 
| 5105                 secondaries.appendByte(secondary); |  | 
| 5106             } |  | 
| 5107         } |  | 
| 5108 |  | 
| 5109         if(notIsContinuation) { |  | 
| 5110             tertiary ^= caseSwitch; |  | 
| 5111         } |  | 
| 5112 |  | 
| 5113         if(tertiary > 0) { |  | 
| 5114             /* This is compression code. */ |  | 
| 5115             /* sequence size check is included in the if clause */ |  | 
| 5116             if (tertiary == tertiaryCommon && notIsContinuation) { |  | 
| 5117                 ++count3; |  | 
| 5118             } else { |  | 
| 5119                 if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_N
      ORMAL) { |  | 
| 5120                     tertiary += tertiaryAddition; |  | 
| 5121                 } else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_
      COMMON3_UPPERFIRST) { |  | 
| 5122                     tertiary -= tertiaryAddition; |  | 
| 5123                 } |  | 
| 5124                 if (count3 > 0) { |  | 
| 5125                     if ((tertiary > tertiaryCommon)) { |  | 
| 5126                         while (count3 > coll->tertiaryTopCount) { |  | 
| 5127                             tertiaries.appendByte(tertiaryTop - coll->tertiaryTo
      pCount); |  | 
| 5128                             count3 -= (uint32_t)coll->tertiaryTopCount; |  | 
| 5129                         } |  | 
| 5130                         tertiaries.appendByte(tertiaryTop - (count3-1)); |  | 
| 5131                     } else { |  | 
| 5132                         while (count3 > coll->tertiaryBottomCount) { |  | 
| 5133                             tertiaries.appendByte(tertiaryBottom + coll->tertiar
      yBottomCount); |  | 
| 5134                             count3 -= (uint32_t)coll->tertiaryBottomCount; |  | 
| 5135                         } |  | 
| 5136                         tertiaries.appendByte(tertiaryBottom + (count3-1)); |  | 
| 5137                     } |  | 
| 5138                     count3 = 0; |  | 
| 5139                 } |  | 
| 5140                 tertiaries.appendByte(tertiary); |  | 
| 5141             } |  | 
| 5142         } |  | 
| 5143     } |  | 
| 5144 |  | 
| 5145     UBool ok = TRUE; |  | 
| 5146     if(U_SUCCESS(*status)) { |  | 
| 5147         /* we have done all the CE's, now let's put them together to form a key 
      */ |  | 
| 5148         if (count2 > 0) { |  | 
| 5149             while (count2 > UCOL_BOT_COUNT2) { |  | 
| 5150                 secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2); |  | 
| 5151                 count2 -= (uint32_t)UCOL_BOT_COUNT2; |  | 
| 5152             } |  | 
| 5153             secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1)); |  | 
| 5154         } |  | 
| 5155         ok &= secondaries.isOk(); |  | 
| 5156         result.Append(UCOL_LEVELTERMINATOR); |  | 
| 5157         secondaries.appendTo(result); |  | 
| 5158 |  | 
| 5159         if (count3 > 0) { |  | 
| 5160             if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) { |  | 
| 5161                 while (count3 >= coll->tertiaryTopCount) { |  | 
| 5162                     tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount); |  | 
| 5163                     count3 -= (uint32_t)coll->tertiaryTopCount; |  | 
| 5164                 } |  | 
| 5165                 tertiaries.appendByte(tertiaryTop - count3); |  | 
| 5166             } else { |  | 
| 5167                 while (count3 > coll->tertiaryBottomCount) { |  | 
| 5168                     tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomC
      ount); |  | 
| 5169                     count3 -= (uint32_t)coll->tertiaryBottomCount; |  | 
| 5170                 } |  | 
| 5171                 tertiaries.appendByte(tertiaryBottom + (count3-1)); |  | 
| 5172             } |  | 
| 5173         } |  | 
| 5174         ok &= tertiaries.isOk(); |  | 
| 5175         result.Append(UCOL_LEVELTERMINATOR); |  | 
| 5176         tertiaries.appendTo(result); |  | 
| 5177 |  | 
| 5178         result.Append(0); |  | 
| 5179     } |  | 
| 5180 |  | 
| 5181     /* To avoid memory leak, free the offset buffer if necessary. */ |  | 
| 5182     ucol_freeOffsetBuffer(&s); |  | 
| 5183 |  | 
| 5184     ok &= result.IsOk(); |  | 
| 5185     if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; } |  | 
| 5186 } |  | 
| 5187 |  | 
| 5188 static inline |  | 
| 5189 UBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) { |  | 
| 5190     UBool notIsContinuation = !isContinuation(CE); |  | 
| 5191     uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF); |  | 
| 5192     if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0) |  | 
| 5193                || (!notIsContinuation && *wasShifted))) |  | 
| 5194         || (*wasShifted && primary1 == 0)) /* amendment to the UCA says that pri
      mary ignorables */ |  | 
| 5195     { |  | 
| 5196         // The stuff below should probably be in the sortkey code... maybe not..
      . |  | 
| 5197         if(primary1 != 0) { /* if we were shifted and we got an ignorable code p
      oint */ |  | 
| 5198             /* we should just completely ignore it */ |  | 
| 5199             *wasShifted = TRUE; |  | 
| 5200             //continue; |  | 
| 5201         } |  | 
| 5202         //*wasShifted = TRUE; |  | 
| 5203         return TRUE; |  | 
| 5204     } else { |  | 
| 5205         *wasShifted = FALSE; |  | 
| 5206         return FALSE; |  | 
| 5207     } |  | 
| 5208 } |  | 
| 5209 static inline |  | 
| 5210 void terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *des
      t) { |  | 
| 5211     if(level < maxLevel) { |  | 
| 5212         dest[i++] = UCOL_LEVELTERMINATOR; |  | 
| 5213     } else { |  | 
| 5214         dest[i++] = 0; |  | 
| 5215     } |  | 
| 5216 } |  | 
| 5217 |  | 
| 5218 /** enumeration of level identifiers for partial sort key generation */ |  | 
| 5219 enum { |  | 
| 5220   UCOL_PSK_PRIMARY = 0, |  | 
| 5221     UCOL_PSK_SECONDARY = 1, |  | 
| 5222     UCOL_PSK_CASE = 2, |  | 
| 5223     UCOL_PSK_TERTIARY = 3, |  | 
| 5224     UCOL_PSK_QUATERNARY = 4, |  | 
| 5225     UCOL_PSK_QUIN = 5,      /** This is an extra level, not used - but we have t
      hree bits to blow */ |  | 
| 5226     UCOL_PSK_IDENTICAL = 6, |  | 
| 5227     UCOL_PSK_NULL = 7,      /** level for the end of sort key. Will just produce
       zeros */ |  | 
| 5228     UCOL_PSK_LIMIT |  | 
| 5229 }; |  | 
| 5230 |  | 
| 5231 /** collation state enum. *_SHIFT value is how much to shift right |  | 
| 5232  *  to get the state piece to the right. *_MASK value should be |  | 
| 5233  *  ANDed with the shifted state. This data is stored in state[1] |  | 
| 5234  *  field. |  | 
| 5235  */ |  | 
| 5236 enum { |  | 
| 5237     UCOL_PSK_LEVEL_SHIFT = 0,      /** level identificator. stores an enum value
       from above */ |  | 
| 5238     UCOL_PSK_LEVEL_MASK = 7,       /** three bits */ |  | 
| 5239     UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary
       or quaternary already written */ |  | 
| 5240     UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1, |  | 
| 5241     /** can be only 0 or 1, since we get up to two bytes from primary or quatern
      ary |  | 
| 5242      *  This field is also used to denote that the French secondary level is fin
      ished |  | 
| 5243      */ |  | 
| 5244     UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */ |  | 
| 5245     UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */ |  | 
| 5246     UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already wri
      tten */ |  | 
| 5247     UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */ |  | 
| 5248     /** When we do French we need to reverse secondary values. However, continua
      tions |  | 
| 5249      *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2
      c3ba |  | 
| 5250      */ |  | 
| 5251     UCOL_PSK_BOCSU_BYTES_SHIFT = 7, |  | 
| 5252     UCOL_PSK_BOCSU_BYTES_MASK = 3, |  | 
| 5253     UCOL_PSK_CONSUMED_CES_SHIFT = 9, |  | 
| 5254     UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF |  | 
| 5255 }; |  | 
| 5256 |  | 
| 5257 // macro calculating the number of expansion CEs available |  | 
| 5258 #define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn |  | 
| 5259 |  | 
| 5260 |  | 
| 5261 /** main sortkey part procedure. On the first call, |  | 
| 5262  *  you should pass in a collator, an iterator, empty state |  | 
| 5263  *  state[0] == state[1] == 0, a buffer to hold results |  | 
| 5264  *  number of bytes you need and an error code pointer. |  | 
| 5265  *  Make sure your buffer is big enough to hold the wanted |  | 
| 5266  *  number of sortkey bytes. I don't check. |  | 
| 5267  *  The only meaningful status you can get back is |  | 
| 5268  *  U_BUFFER_OVERFLOW_ERROR, which basically means that you |  | 
| 5269  *  have been dealt a raw deal and that you probably won't |  | 
| 5270  *  be able to use partial sortkey generation for this |  | 
| 5271  *  particular combination of string and collator. This |  | 
| 5272  *  is highly unlikely, but you should still check the error code. |  | 
| 5273  *  Any other status means that you're not in a sane situation |  | 
| 5274  *  anymore. After the first call, preserve state values and |  | 
| 5275  *  use them on subsequent calls to obtain more bytes of a sortkey. |  | 
| 5276  *  Use until the number of bytes written is smaller than the requested |  | 
| 5277  *  number of bytes. Generated sortkey is not compatible with the |  | 
| 5278  *  one generated by ucol_getSortKey, as we don't do any compression. |  | 
| 5279  *  However, levels are still terminated by a 1 (one) and the sortkey |  | 
| 5280  *  is terminated by a 0 (zero). Identical level is the same as in the |  | 
| 5281  *  regular sortkey - internal bocu-1 implementation is used. |  | 
| 5282  *  For curious, although you cannot do much about this, here is |  | 
| 5283  *  the structure of state words. |  | 
| 5284  *  state[0] - iterator state. Depends on the iterator implementation, |  | 
| 5285  *             but allows the iterator to continue where it stopped in |  | 
| 5286  *             the last iteration. |  | 
| 5287  *  state[1] - collation processing state. Here is the distribution |  | 
| 5288  *             of the bits: |  | 
| 5289  *   0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary |  | 
| 5290  *             quaternary, quin (we don't use this one), identical and |  | 
| 5291  *             null (producing only zeroes - first one to terminate the |  | 
| 5292  *             sortkey and subsequent to fill the buffer). |  | 
| 5293  *   3       - byte count. Number of bytes written on the primary level. |  | 
| 5294  *   4       - was shifted. Whether the previous iteration finished in the |  | 
| 5295  *             shifted state. |  | 
| 5296  *   5, 6    - French continuation bytes written. See the comment in the enum |  | 
| 5297  *   7,8     - Bocsu bytes used. Number of bytes from a bocu sequence on |  | 
| 5298  *             the identical level. |  | 
| 5299  *   9..31   - CEs consumed. Number of getCE or next32 operations performed |  | 
| 5300  *             since thes last successful update of the iterator state. |  | 
| 5301  */ |  | 
| 5302 U_CAPI int32_t U_EXPORT2 | 209 U_CAPI int32_t U_EXPORT2 | 
| 5303 ucol_nextSortKeyPart(const UCollator *coll, | 210 ucol_nextSortKeyPart(const UCollator *coll, | 
| 5304                      UCharIterator *iter, | 211                      UCharIterator *iter, | 
| 5305                      uint32_t state[2], | 212                      uint32_t state[2], | 
| 5306                      uint8_t *dest, int32_t count, | 213                      uint8_t *dest, int32_t count, | 
| 5307                      UErrorCode *status) | 214                      UErrorCode *status) | 
| 5308 { | 215 { | 
| 5309     /* error checking */ | 216     /* error checking */ | 
| 5310     if(status==NULL || U_FAILURE(*status)) { | 217     if(status==NULL || U_FAILURE(*status)) { | 
| 5311         return 0; | 218         return 0; | 
| 5312     } | 219     } | 
| 5313     UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); | 220     UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); | 
| 5314     if( coll==NULL || iter==NULL || |  | 
| 5315         state==NULL || |  | 
| 5316         count<0 || (count>0 && dest==NULL) |  | 
| 5317     ) { |  | 
| 5318         *status=U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 5319         UTRACE_EXIT_STATUS(status); |  | 
| 5320         return 0; |  | 
| 5321     } |  | 
| 5322 |  | 
| 5323     UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=
      %d", | 221     UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=
      %d", | 
| 5324                   coll, iter, state[0], state[1], dest, count); | 222                   coll, iter, state[0], state[1], dest, count); | 
| 5325 | 223 | 
| 5326     if(count==0) { | 224     int32_t i = Collator::fromUCollator(coll)-> | 
| 5327         /* nothing to do */ | 225             internalNextSortKeyPart(iter, state, dest, count, *status); | 
| 5328         UTRACE_EXIT_VALUE(0); | 226 | 
| 5329         return 0; |  | 
| 5330     } |  | 
| 5331     /** Setting up situation according to the state we got from the previous ite
      ration */ |  | 
| 5332     // The state of the iterator from the previous invocation |  | 
| 5333     uint32_t iterState = state[0]; |  | 
| 5334     // Has the last iteration ended in the shifted state |  | 
| 5335     UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_
      SHIFTED_MASK)?TRUE:FALSE; |  | 
| 5336     // What is the current level of the sortkey? |  | 
| 5337     int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK; |  | 
| 5338     // Have we written only one byte from a two byte primary in the previous ite
      ration? |  | 
| 5339     // Also on secondary level - have we finished with the French secondary? |  | 
| 5340     int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_D
      ONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK; |  | 
| 5341     // number of bytes in the continuation buffer for French |  | 
| 5342     int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USE
      D_FRENCH_MASK; |  | 
| 5343     // Number of bytes already written from a bocsu sequence. Since |  | 
| 5344     // the longes bocsu sequence is 4 long, this can be up to 3. |  | 
| 5345     int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK
      _BOCSU_BYTES_MASK; |  | 
| 5346     // Number of elements that need to be consumed in this iteration because |  | 
| 5347     // the iterator returned UITER_NO_STATE at the end of the last iteration, |  | 
| 5348     // so we had to save the last valid state. |  | 
| 5349     int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED
      _CES_MASK; |  | 
| 5350 |  | 
| 5351     /** values that depend on the collator attributes */ |  | 
| 5352     // strength of the collator. |  | 
| 5353     int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status); |  | 
| 5354     // maximal level of the partial sortkey. Need to take whether case level is 
      done |  | 
| 5355     int32_t maxLevel = 0; |  | 
| 5356     if(strength < UCOL_TERTIARY) { |  | 
| 5357         if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) { |  | 
| 5358             maxLevel = UCOL_PSK_CASE; |  | 
| 5359         } else { |  | 
| 5360             maxLevel = strength; |  | 
| 5361         } |  | 
| 5362     } else { |  | 
| 5363         if(strength == UCOL_TERTIARY) { |  | 
| 5364             maxLevel = UCOL_PSK_TERTIARY; |  | 
| 5365         } else if(strength == UCOL_QUATERNARY) { |  | 
| 5366             maxLevel = UCOL_PSK_QUATERNARY; |  | 
| 5367         } else { // identical |  | 
| 5368             maxLevel = UCOL_IDENTICAL; |  | 
| 5369         } |  | 
| 5370     } |  | 
| 5371     // value for the quaternary level if Hiragana is encountered. Used for JIS X
       4061 collation |  | 
| 5372     uint8_t UCOL_HIRAGANA_QUAD = |  | 
| 5373       (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON
      )?0xFE:0xFF; |  | 
| 5374     // Boundary value that decides whether a CE is shifted or not |  | 
| 5375     uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopV
      alue<<16):0; |  | 
| 5376     // Are we doing French collation? |  | 
| 5377     UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) 
      == UCOL_ON); |  | 
| 5378 |  | 
| 5379     /** initializing the collation state */ |  | 
| 5380     UBool notIsContinuation = FALSE; |  | 
| 5381     uint32_t CE = UCOL_NO_MORE_CES; |  | 
| 5382 |  | 
| 5383     collIterate s; |  | 
| 5384     IInit_collIterate(coll, NULL, -1, &s, status); |  | 
| 5385     if(U_FAILURE(*status)) { |  | 
| 5386         UTRACE_EXIT_STATUS(*status); |  | 
| 5387         return 0; |  | 
| 5388     } |  | 
| 5389     s.iterator = iter; |  | 
| 5390     s.flags |= UCOL_USE_ITERATOR; |  | 
| 5391     // This variable tells us whether we have produced some other levels in this
       iteration |  | 
| 5392     // before we moved to the identical level. In that case, we need to switch t
      he |  | 
| 5393     // type of the iterator. |  | 
| 5394     UBool doingIdenticalFromStart = FALSE; |  | 
| 5395     // Normalizing iterator |  | 
| 5396     // The division for the array length may truncate the array size to |  | 
| 5397     // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high |  | 
| 5398     // for all platforms anyway. |  | 
| 5399     UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; |  | 
| 5400     UNormIterator *normIter = NULL; |  | 
| 5401     // If the normalization is turned on for the collator and we are below ident
      ical level |  | 
| 5402     // we will use a FCD normalizing iterator |  | 
| 5403     if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && le
      vel < UCOL_PSK_IDENTICAL) { |  | 
| 5404         normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); |  | 
| 5405         s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status); |  | 
| 5406         s.flags &= ~UCOL_ITER_NORM; |  | 
| 5407         if(U_FAILURE(*status)) { |  | 
| 5408             UTRACE_EXIT_STATUS(*status); |  | 
| 5409             return 0; |  | 
| 5410         } |  | 
| 5411     } else if(level == UCOL_PSK_IDENTICAL) { |  | 
| 5412         // for identical level, we need a NFD iterator. We need to instantiate i
      t here, since we |  | 
| 5413         // will be updating the state - and this cannot be done on an ordinary i
      terator. |  | 
| 5414         normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status); |  | 
| 5415         s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); |  | 
| 5416         s.flags &= ~UCOL_ITER_NORM; |  | 
| 5417         if(U_FAILURE(*status)) { |  | 
| 5418             UTRACE_EXIT_STATUS(*status); |  | 
| 5419             return 0; |  | 
| 5420         } |  | 
| 5421         doingIdenticalFromStart = TRUE; |  | 
| 5422     } |  | 
| 5423 |  | 
| 5424     // This is the tentative new state of the iterator. The problem |  | 
| 5425     // is that the iterator might return an undefined state, in |  | 
| 5426     // which case we should save the last valid state and increase |  | 
| 5427     // the iterator skip value. |  | 
| 5428     uint32_t newState = 0; |  | 
| 5429 |  | 
| 5430     // First, we set the iterator to the last valid position |  | 
| 5431     // from the last iteration. This was saved in state[0]. |  | 
| 5432     if(iterState == 0) { |  | 
| 5433         /* initial state */ |  | 
| 5434         if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone)
       { |  | 
| 5435             s.iterator->move(s.iterator, 0, UITER_LIMIT); |  | 
| 5436         } else { |  | 
| 5437             s.iterator->move(s.iterator, 0, UITER_START); |  | 
| 5438         } |  | 
| 5439     } else { |  | 
| 5440         /* reset to previous state */ |  | 
| 5441         s.iterator->setState(s.iterator, iterState, status); |  | 
| 5442         if(U_FAILURE(*status)) { |  | 
| 5443             UTRACE_EXIT_STATUS(*status); |  | 
| 5444             return 0; |  | 
| 5445         } |  | 
| 5446     } |  | 
| 5447 |  | 
| 5448 |  | 
| 5449 |  | 
| 5450     // This variable tells us whether we can attempt to update the state |  | 
| 5451     // of iterator. Situations where we don't want to update iterator state |  | 
| 5452     // are the existence of expansion CEs that are not yet processed, and |  | 
| 5453     // finishing the case level without enough space in the buffer to insert |  | 
| 5454     // a level terminator. |  | 
| 5455     UBool canUpdateState = TRUE; |  | 
| 5456 |  | 
| 5457     // Consume all the CEs that were consumed at the end of the previous |  | 
| 5458     // iteration without updating the iterator state. On identical level, |  | 
| 5459     // consume the code points. |  | 
| 5460     int32_t counter = cces; |  | 
| 5461     if(level < UCOL_PSK_IDENTICAL) { |  | 
| 5462         while(counter-->0) { |  | 
| 5463             // If we're doing French and we are on the secondary level, |  | 
| 5464             // we go backwards. |  | 
| 5465             if(level == UCOL_PSK_SECONDARY && doingFrench) { |  | 
| 5466                 CE = ucol_IGetPrevCE(coll, &s, status); |  | 
| 5467             } else { |  | 
| 5468                 CE = ucol_IGetNextCE(coll, &s, status); |  | 
| 5469             } |  | 
| 5470             if(CE==UCOL_NO_MORE_CES) { |  | 
| 5471                 /* should not happen */ |  | 
| 5472                 *status=U_INTERNAL_PROGRAM_ERROR; |  | 
| 5473                 UTRACE_EXIT_STATUS(*status); |  | 
| 5474                 return 0; |  | 
| 5475             } |  | 
| 5476             if(uprv_numAvailableExpCEs(s)) { |  | 
| 5477                 canUpdateState = FALSE; |  | 
| 5478             } |  | 
| 5479         } |  | 
| 5480     } else { |  | 
| 5481         while(counter-->0) { |  | 
| 5482             uiter_next32(s.iterator); |  | 
| 5483         } |  | 
| 5484     } |  | 
| 5485 |  | 
| 5486     // French secondary needs to know whether the iterator state of zero came fr
      om previous level OR |  | 
| 5487     // from a new invocation... |  | 
| 5488     UBool wasDoingPrimary = FALSE; |  | 
| 5489     // destination buffer byte counter. When this guy |  | 
| 5490     // gets to count, we're done with the iteration |  | 
| 5491     int32_t i = 0; |  | 
| 5492     // used to count the zero bytes written after we |  | 
| 5493     // have finished with the sort key |  | 
| 5494     int32_t j = 0; |  | 
| 5495 |  | 
| 5496 |  | 
| 5497     // Hm.... I think we're ready to plunge in. Basic story is as following: |  | 
| 5498     // we have a fall through case based on level. This is used for initial |  | 
| 5499     // positioning on iteration start. Every level processor contains a |  | 
| 5500     // for(;;) which will be broken when we exhaust all the CEs. Other |  | 
| 5501     // way to exit is a goto saveState, which happens when we have filled |  | 
| 5502     // out our buffer. |  | 
| 5503     switch(level) { |  | 
| 5504     case UCOL_PSK_PRIMARY: |  | 
| 5505         wasDoingPrimary = TRUE; |  | 
| 5506         for(;;) { |  | 
| 5507             if(i==count) { |  | 
| 5508                 goto saveState; |  | 
| 5509             } |  | 
| 5510             // We should save the state only if we |  | 
| 5511             // are sure that we are done with the |  | 
| 5512             // previous iterator state |  | 
| 5513             if(canUpdateState && byteCountOrFrenchDone == 0) { |  | 
| 5514                 newState = s.iterator->getState(s.iterator); |  | 
| 5515                 if(newState != UITER_NO_STATE) { |  | 
| 5516                     iterState = newState; |  | 
| 5517                     cces = 0; |  | 
| 5518                 } |  | 
| 5519             } |  | 
| 5520             CE = ucol_IGetNextCE(coll, &s, status); |  | 
| 5521             cces++; |  | 
| 5522             if(CE==UCOL_NO_MORE_CES) { |  | 
| 5523                 // Add the level separator |  | 
| 5524                 terminatePSKLevel(level, maxLevel, i, dest); |  | 
| 5525                 byteCountOrFrenchDone=0; |  | 
| 5526                 // Restart the iteration an move to the |  | 
| 5527                 // second level |  | 
| 5528                 s.iterator->move(s.iterator, 0, UITER_START); |  | 
| 5529                 cces = 0; |  | 
| 5530                 level = UCOL_PSK_SECONDARY; |  | 
| 5531                 break; |  | 
| 5532             } |  | 
| 5533             if(!isContinuation(CE)){ |  | 
| 5534                 if(coll->leadBytePermutationTable != NULL){ |  | 
| 5535                     CE = (coll->leadBytePermutationTable[CE>>24] << 24) | (CE & 
      0x00FFFFFF); |  | 
| 5536                 } |  | 
| 5537             } |  | 
| 5538             if(!isShiftedCE(CE, LVT, &wasShifted)) { |  | 
| 5539                 CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */ |  | 
| 5540                 if(CE != 0) { |  | 
| 5541                     if(byteCountOrFrenchDone == 0) { |  | 
| 5542                         // get the second byte of primary |  | 
| 5543                         dest[i++]=(uint8_t)(CE >> 8); |  | 
| 5544                     } else { |  | 
| 5545                         byteCountOrFrenchDone = 0; |  | 
| 5546                     } |  | 
| 5547                     if((CE &=0xff)!=0) { |  | 
| 5548                         if(i==count) { |  | 
| 5549                             /* overflow */ |  | 
| 5550                             byteCountOrFrenchDone = 1; |  | 
| 5551                             cces--; |  | 
| 5552                             goto saveState; |  | 
| 5553                         } |  | 
| 5554                         dest[i++]=(uint8_t)CE; |  | 
| 5555                     } |  | 
| 5556                 } |  | 
| 5557             } |  | 
| 5558             if(uprv_numAvailableExpCEs(s)) { |  | 
| 5559                 canUpdateState = FALSE; |  | 
| 5560             } else { |  | 
| 5561                 canUpdateState = TRUE; |  | 
| 5562             } |  | 
| 5563         } |  | 
| 5564         /* fall through to next level */ |  | 
| 5565     case UCOL_PSK_SECONDARY: |  | 
| 5566         if(strength >= UCOL_SECONDARY) { |  | 
| 5567             if(!doingFrench) { |  | 
| 5568                 for(;;) { |  | 
| 5569                     if(i == count) { |  | 
| 5570                         goto saveState; |  | 
| 5571                     } |  | 
| 5572                     // We should save the state only if we |  | 
| 5573                     // are sure that we are done with the |  | 
| 5574                     // previous iterator state |  | 
| 5575                     if(canUpdateState) { |  | 
| 5576                         newState = s.iterator->getState(s.iterator); |  | 
| 5577                         if(newState != UITER_NO_STATE) { |  | 
| 5578                             iterState = newState; |  | 
| 5579                             cces = 0; |  | 
| 5580                         } |  | 
| 5581                     } |  | 
| 5582                     CE = ucol_IGetNextCE(coll, &s, status); |  | 
| 5583                     cces++; |  | 
| 5584                     if(CE==UCOL_NO_MORE_CES) { |  | 
| 5585                         // Add the level separator |  | 
| 5586                         terminatePSKLevel(level, maxLevel, i, dest); |  | 
| 5587                         byteCountOrFrenchDone = 0; |  | 
| 5588                         // Restart the iteration an move to the |  | 
| 5589                         // second level |  | 
| 5590                         s.iterator->move(s.iterator, 0, UITER_START); |  | 
| 5591                         cces = 0; |  | 
| 5592                         level = UCOL_PSK_CASE; |  | 
| 5593                         break; |  | 
| 5594                     } |  | 
| 5595                     if(!isShiftedCE(CE, LVT, &wasShifted)) { |  | 
| 5596                         CE >>= 8; /* get secondary */ |  | 
| 5597                         if(CE != 0) { |  | 
| 5598                             dest[i++]=(uint8_t)CE; |  | 
| 5599                         } |  | 
| 5600                     } |  | 
| 5601                     if(uprv_numAvailableExpCEs(s)) { |  | 
| 5602                         canUpdateState = FALSE; |  | 
| 5603                     } else { |  | 
| 5604                         canUpdateState = TRUE; |  | 
| 5605                     } |  | 
| 5606                 } |  | 
| 5607             } else { // French secondary processing |  | 
| 5608                 uint8_t frenchBuff[UCOL_MAX_BUFFER]; |  | 
| 5609                 int32_t frenchIndex = 0; |  | 
| 5610                 // Here we are going backwards. |  | 
| 5611                 // If the iterator is at the beggining, it should be |  | 
| 5612                 // moved to end. |  | 
| 5613                 if(wasDoingPrimary) { |  | 
| 5614                     s.iterator->move(s.iterator, 0, UITER_LIMIT); |  | 
| 5615                     cces = 0; |  | 
| 5616                 } |  | 
| 5617                 for(;;) { |  | 
| 5618                     if(i == count) { |  | 
| 5619                         goto saveState; |  | 
| 5620                     } |  | 
| 5621                     if(canUpdateState) { |  | 
| 5622                         newState = s.iterator->getState(s.iterator); |  | 
| 5623                         if(newState != UITER_NO_STATE) { |  | 
| 5624                             iterState = newState; |  | 
| 5625                             cces = 0; |  | 
| 5626                         } |  | 
| 5627                     } |  | 
| 5628                     CE = ucol_IGetPrevCE(coll, &s, status); |  | 
| 5629                     cces++; |  | 
| 5630                     if(CE==UCOL_NO_MORE_CES) { |  | 
| 5631                         // Add the level separator |  | 
| 5632                         terminatePSKLevel(level, maxLevel, i, dest); |  | 
| 5633                         byteCountOrFrenchDone = 0; |  | 
| 5634                         // Restart the iteration an move to the next level |  | 
| 5635                         s.iterator->move(s.iterator, 0, UITER_START); |  | 
| 5636                         level = UCOL_PSK_CASE; |  | 
| 5637                         break; |  | 
| 5638                     } |  | 
| 5639                     if(isContinuation(CE)) { // if it's a continuation, we want 
      to save it and |  | 
| 5640                         // reverse when we get a first non-continuation CE. |  | 
| 5641                         CE >>= 8; |  | 
| 5642                         frenchBuff[frenchIndex++] = (uint8_t)CE; |  | 
| 5643                     } else if(!isShiftedCE(CE, LVT, &wasShifted)) { |  | 
| 5644                         CE >>= 8; /* get secondary */ |  | 
| 5645                         if(!frenchIndex) { |  | 
| 5646                             if(CE != 0) { |  | 
| 5647                                 dest[i++]=(uint8_t)CE; |  | 
| 5648                             } |  | 
| 5649                         } else { |  | 
| 5650                             frenchBuff[frenchIndex++] = (uint8_t)CE; |  | 
| 5651                             frenchIndex -= usedFrench; |  | 
| 5652                             usedFrench = 0; |  | 
| 5653                             while(i < count && frenchIndex) { |  | 
| 5654                                 dest[i++] = frenchBuff[--frenchIndex]; |  | 
| 5655                                 usedFrench++; |  | 
| 5656                             } |  | 
| 5657                         } |  | 
| 5658                     } |  | 
| 5659                     if(uprv_numAvailableExpCEs(s)) { |  | 
| 5660                         canUpdateState = FALSE; |  | 
| 5661                     } else { |  | 
| 5662                         canUpdateState = TRUE; |  | 
| 5663                     } |  | 
| 5664                 } |  | 
| 5665             } |  | 
| 5666         } else { |  | 
| 5667             level = UCOL_PSK_CASE; |  | 
| 5668         } |  | 
| 5669         /* fall through to next level */ |  | 
| 5670     case UCOL_PSK_CASE: |  | 
| 5671         if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) { |  | 
| 5672             uint32_t caseShift = UCOL_CASE_SHIFT_START; |  | 
| 5673             uint8_t caseByte = UCOL_CASE_BYTE_START; |  | 
| 5674             uint8_t caseBits = 0; |  | 
| 5675 |  | 
| 5676             for(;;) { |  | 
| 5677                 U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START); |  | 
| 5678                 if(i == count) { |  | 
| 5679                     goto saveState; |  | 
| 5680                 } |  | 
| 5681                 // We should save the state only if we |  | 
| 5682                 // are sure that we are done with the |  | 
| 5683                 // previous iterator state |  | 
| 5684                 if(canUpdateState) { |  | 
| 5685                     newState = s.iterator->getState(s.iterator); |  | 
| 5686                     if(newState != UITER_NO_STATE) { |  | 
| 5687                         iterState = newState; |  | 
| 5688                         cces = 0; |  | 
| 5689                     } |  | 
| 5690                 } |  | 
| 5691                 CE = ucol_IGetNextCE(coll, &s, status); |  | 
| 5692                 cces++; |  | 
| 5693                 if(CE==UCOL_NO_MORE_CES) { |  | 
| 5694                     // On the case level we might have an unfinished |  | 
| 5695                     // case byte. Add one if it's started. |  | 
| 5696                     if(caseShift != UCOL_CASE_SHIFT_START) { |  | 
| 5697                         dest[i++] = caseByte; |  | 
| 5698                     } |  | 
| 5699                     cces = 0; |  | 
| 5700                     // We have finished processing CEs on this level. |  | 
| 5701                     // However, we don't know if we have enough space |  | 
| 5702                     // to add a case level terminator. |  | 
| 5703                     if(i < count) { |  | 
| 5704                         // Add the level separator |  | 
| 5705                         terminatePSKLevel(level, maxLevel, i, dest); |  | 
| 5706                         // Restart the iteration and move to the |  | 
| 5707                         // next level |  | 
| 5708                         s.iterator->move(s.iterator, 0, UITER_START); |  | 
| 5709                         level = UCOL_PSK_TERTIARY; |  | 
| 5710                     } else { |  | 
| 5711                         canUpdateState = FALSE; |  | 
| 5712                     } |  | 
| 5713                     break; |  | 
| 5714                 } |  | 
| 5715 |  | 
| 5716                 if(!isShiftedCE(CE, LVT, &wasShifted)) { |  | 
| 5717                     if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || s
      trength > UCOL_PRIMARY)) { |  | 
| 5718                         // do the case level if we need to do it. We don't want 
      to calculate |  | 
| 5719                         // case level for primary ignorables if we have only pri
      mary strength and case level |  | 
| 5720                         // otherwise we would break well formedness of CEs |  | 
| 5721                         CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK); |  | 
| 5722                         caseBits = (uint8_t)(CE & 0xC0); |  | 
| 5723                         // this copies the case level logic from the |  | 
| 5724                         // sort key generation code |  | 
| 5725                         if(CE != 0) { |  | 
| 5726                             if (caseShift == 0) { |  | 
| 5727                                 dest[i++] = caseByte; |  | 
| 5728                                 caseShift = UCOL_CASE_SHIFT_START; |  | 
| 5729                                 caseByte = UCOL_CASE_BYTE_START; |  | 
| 5730                             } |  | 
| 5731                             if(coll->caseFirst == UCOL_UPPER_FIRST) { |  | 
| 5732                                 if((caseBits & 0xC0) == 0) { |  | 
| 5733                                     caseByte |= 1 << (--caseShift); |  | 
| 5734                                 } else { |  | 
| 5735                                     caseByte |= 0 << (--caseShift); |  | 
| 5736                                     /* second bit */ |  | 
| 5737                                     if(caseShift == 0) { |  | 
| 5738                                         dest[i++] = caseByte; |  | 
| 5739                                         caseShift = UCOL_CASE_SHIFT_START; |  | 
| 5740                                         caseByte = UCOL_CASE_BYTE_START; |  | 
| 5741                                     } |  | 
| 5742                                     caseByte |= ((caseBits>>6)&1) << (--caseShif
      t); |  | 
| 5743                                 } |  | 
| 5744                             } else { |  | 
| 5745                                 if((caseBits & 0xC0) == 0) { |  | 
| 5746                                     caseByte |= 0 << (--caseShift); |  | 
| 5747                                 } else { |  | 
| 5748                                     caseByte |= 1 << (--caseShift); |  | 
| 5749                                     /* second bit */ |  | 
| 5750                                     if(caseShift == 0) { |  | 
| 5751                                         dest[i++] = caseByte; |  | 
| 5752                                         caseShift = UCOL_CASE_SHIFT_START; |  | 
| 5753                                         caseByte = UCOL_CASE_BYTE_START; |  | 
| 5754                                     } |  | 
| 5755                                     caseByte |= ((caseBits>>7)&1) << (--caseShif
      t); |  | 
| 5756                                 } |  | 
| 5757                             } |  | 
| 5758                         } |  | 
| 5759 |  | 
| 5760                     } |  | 
| 5761                 } |  | 
| 5762                 // Not sure this is correct for the case level - revisit |  | 
| 5763                 if(uprv_numAvailableExpCEs(s)) { |  | 
| 5764                     canUpdateState = FALSE; |  | 
| 5765                 } else { |  | 
| 5766                     canUpdateState = TRUE; |  | 
| 5767                 } |  | 
| 5768             } |  | 
| 5769         } else { |  | 
| 5770             level = UCOL_PSK_TERTIARY; |  | 
| 5771         } |  | 
| 5772         /* fall through to next level */ |  | 
| 5773     case UCOL_PSK_TERTIARY: |  | 
| 5774         if(strength >= UCOL_TERTIARY) { |  | 
| 5775             for(;;) { |  | 
| 5776                 if(i == count) { |  | 
| 5777                     goto saveState; |  | 
| 5778                 } |  | 
| 5779                 // We should save the state only if we |  | 
| 5780                 // are sure that we are done with the |  | 
| 5781                 // previous iterator state |  | 
| 5782                 if(canUpdateState) { |  | 
| 5783                     newState = s.iterator->getState(s.iterator); |  | 
| 5784                     if(newState != UITER_NO_STATE) { |  | 
| 5785                         iterState = newState; |  | 
| 5786                         cces = 0; |  | 
| 5787                     } |  | 
| 5788                 } |  | 
| 5789                 CE = ucol_IGetNextCE(coll, &s, status); |  | 
| 5790                 cces++; |  | 
| 5791                 if(CE==UCOL_NO_MORE_CES) { |  | 
| 5792                     // Add the level separator |  | 
| 5793                     terminatePSKLevel(level, maxLevel, i, dest); |  | 
| 5794                     byteCountOrFrenchDone = 0; |  | 
| 5795                     // Restart the iteration an move to the |  | 
| 5796                     // second level |  | 
| 5797                     s.iterator->move(s.iterator, 0, UITER_START); |  | 
| 5798                     cces = 0; |  | 
| 5799                     level = UCOL_PSK_QUATERNARY; |  | 
| 5800                     break; |  | 
| 5801                 } |  | 
| 5802                 if(!isShiftedCE(CE, LVT, &wasShifted)) { |  | 
| 5803                     notIsContinuation = !isContinuation(CE); |  | 
| 5804 |  | 
| 5805                     if(notIsContinuation) { |  | 
| 5806                         CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK); |  | 
| 5807                         CE ^= coll->caseSwitch; |  | 
| 5808                         CE &= coll->tertiaryMask; |  | 
| 5809                     } else { |  | 
| 5810                         CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION)); |  | 
| 5811                     } |  | 
| 5812 |  | 
| 5813                     if(CE != 0) { |  | 
| 5814                         dest[i++]=(uint8_t)CE; |  | 
| 5815                     } |  | 
| 5816                 } |  | 
| 5817                 if(uprv_numAvailableExpCEs(s)) { |  | 
| 5818                     canUpdateState = FALSE; |  | 
| 5819                 } else { |  | 
| 5820                     canUpdateState = TRUE; |  | 
| 5821                 } |  | 
| 5822             } |  | 
| 5823         } else { |  | 
| 5824             // if we're not doing tertiary |  | 
| 5825             // skip to the end |  | 
| 5826             level = UCOL_PSK_NULL; |  | 
| 5827         } |  | 
| 5828         /* fall through to next level */ |  | 
| 5829     case UCOL_PSK_QUATERNARY: |  | 
| 5830         if(strength >= UCOL_QUATERNARY) { |  | 
| 5831             for(;;) { |  | 
| 5832                 if(i == count) { |  | 
| 5833                     goto saveState; |  | 
| 5834                 } |  | 
| 5835                 // We should save the state only if we |  | 
| 5836                 // are sure that we are done with the |  | 
| 5837                 // previous iterator state |  | 
| 5838                 if(canUpdateState) { |  | 
| 5839                     newState = s.iterator->getState(s.iterator); |  | 
| 5840                     if(newState != UITER_NO_STATE) { |  | 
| 5841                         iterState = newState; |  | 
| 5842                         cces = 0; |  | 
| 5843                     } |  | 
| 5844                 } |  | 
| 5845                 CE = ucol_IGetNextCE(coll, &s, status); |  | 
| 5846                 cces++; |  | 
| 5847                 if(CE==UCOL_NO_MORE_CES) { |  | 
| 5848                     // Add the level separator |  | 
| 5849                     terminatePSKLevel(level, maxLevel, i, dest); |  | 
| 5850                     //dest[i++] = UCOL_LEVELTERMINATOR; |  | 
| 5851                     byteCountOrFrenchDone = 0; |  | 
| 5852                     // Restart the iteration an move to the |  | 
| 5853                     // second level |  | 
| 5854                     s.iterator->move(s.iterator, 0, UITER_START); |  | 
| 5855                     cces = 0; |  | 
| 5856                     level = UCOL_PSK_QUIN; |  | 
| 5857                     break; |  | 
| 5858                 } |  | 
| 5859                 if(CE==0) |  | 
| 5860                     continue; |  | 
| 5861                 if(isShiftedCE(CE, LVT, &wasShifted)) { |  | 
| 5862                     CE >>= 16; /* get primary */ |  | 
| 5863                     if(CE != 0) { |  | 
| 5864                         if(byteCountOrFrenchDone == 0) { |  | 
| 5865                             dest[i++]=(uint8_t)(CE >> 8); |  | 
| 5866                         } else { |  | 
| 5867                             byteCountOrFrenchDone = 0; |  | 
| 5868                         } |  | 
| 5869                         if((CE &=0xff)!=0) { |  | 
| 5870                             if(i==count) { |  | 
| 5871                                 /* overflow */ |  | 
| 5872                                 byteCountOrFrenchDone = 1; |  | 
| 5873                                 goto saveState; |  | 
| 5874                             } |  | 
| 5875                             dest[i++]=(uint8_t)CE; |  | 
| 5876                         } |  | 
| 5877                     } |  | 
| 5878                 } else { |  | 
| 5879                     notIsContinuation = !isContinuation(CE); |  | 
| 5880                     if(notIsContinuation) { |  | 
| 5881                         if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana a
      nd we need to note it |  | 
| 5882                             dest[i++] = UCOL_HIRAGANA_QUAD; |  | 
| 5883                         } else { |  | 
| 5884                             dest[i++] = 0xFF; |  | 
| 5885                         } |  | 
| 5886                     } |  | 
| 5887                 } |  | 
| 5888                 if(uprv_numAvailableExpCEs(s)) { |  | 
| 5889                     canUpdateState = FALSE; |  | 
| 5890                 } else { |  | 
| 5891                     canUpdateState = TRUE; |  | 
| 5892                 } |  | 
| 5893             } |  | 
| 5894         } else { |  | 
| 5895             // if we're not doing quaternary |  | 
| 5896             // skip to the end |  | 
| 5897             level = UCOL_PSK_NULL; |  | 
| 5898         } |  | 
| 5899         /* fall through to next level */ |  | 
| 5900     case UCOL_PSK_QUIN: |  | 
| 5901         level = UCOL_PSK_IDENTICAL; |  | 
| 5902         /* fall through to next level */ |  | 
| 5903     case UCOL_PSK_IDENTICAL: |  | 
| 5904         if(strength >= UCOL_IDENTICAL) { |  | 
| 5905             UChar32 first, second; |  | 
| 5906             int32_t bocsuBytesWritten = 0; |  | 
| 5907             // We always need to do identical on |  | 
| 5908             // the NFD form of the string. |  | 
| 5909             if(normIter == NULL) { |  | 
| 5910                 // we arrived from the level below and |  | 
| 5911                 // normalization was not turned on. |  | 
| 5912                 // therefore, we need to make a fresh NFD iterator |  | 
| 5913                 normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), 
      status); |  | 
| 5914                 s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); |  | 
| 5915             } else if(!doingIdenticalFromStart) { |  | 
| 5916                 // there is an iterator, but we did some other levels. |  | 
| 5917                 // therefore, we have a FCD iterator - need to make |  | 
| 5918                 // a NFD one. |  | 
| 5919                 // normIter being at the beginning does not guarantee |  | 
| 5920                 // that the underlying iterator is at the beginning |  | 
| 5921                 iter->move(iter, 0, UITER_START); |  | 
| 5922                 s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status); |  | 
| 5923             } |  | 
| 5924             // At this point we have a NFD iterator that is positioned |  | 
| 5925             // in the right place |  | 
| 5926             if(U_FAILURE(*status)) { |  | 
| 5927                 UTRACE_EXIT_STATUS(*status); |  | 
| 5928                 return 0; |  | 
| 5929             } |  | 
| 5930             first = uiter_previous32(s.iterator); |  | 
| 5931             // maybe we're at the start of the string |  | 
| 5932             if(first == U_SENTINEL) { |  | 
| 5933                 first = 0; |  | 
| 5934             } else { |  | 
| 5935                 uiter_next32(s.iterator); |  | 
| 5936             } |  | 
| 5937 |  | 
| 5938             j = 0; |  | 
| 5939             for(;;) { |  | 
| 5940                 if(i == count) { |  | 
| 5941                     if(j+1 < bocsuBytesWritten) { |  | 
| 5942                         bocsuBytesUsed = j+1; |  | 
| 5943                     } |  | 
| 5944                     goto saveState; |  | 
| 5945                 } |  | 
| 5946 |  | 
| 5947                 // On identical level, we will always save |  | 
| 5948                 // the state if we reach this point, since |  | 
| 5949                 // we don't depend on getNextCE for content |  | 
| 5950                 // all the content is in our buffer and we |  | 
| 5951                 // already either stored the full buffer OR |  | 
| 5952                 // otherwise we won't arrive here. |  | 
| 5953                 newState = s.iterator->getState(s.iterator); |  | 
| 5954                 if(newState != UITER_NO_STATE) { |  | 
| 5955                     iterState = newState; |  | 
| 5956                     cces = 0; |  | 
| 5957                 } |  | 
| 5958 |  | 
| 5959                 uint8_t buff[4]; |  | 
| 5960                 second = uiter_next32(s.iterator); |  | 
| 5961                 cces++; |  | 
| 5962 |  | 
| 5963                 // end condition for identical level |  | 
| 5964                 if(second == U_SENTINEL) { |  | 
| 5965                     terminatePSKLevel(level, maxLevel, i, dest); |  | 
| 5966                     level = UCOL_PSK_NULL; |  | 
| 5967                     break; |  | 
| 5968                 } |  | 
| 5969                 bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, seco
      nd, buff); |  | 
| 5970                 first = second; |  | 
| 5971 |  | 
| 5972                 j = 0; |  | 
| 5973                 if(bocsuBytesUsed != 0) { |  | 
| 5974                     while(bocsuBytesUsed-->0) { |  | 
| 5975                         j++; |  | 
| 5976                     } |  | 
| 5977                 } |  | 
| 5978 |  | 
| 5979                 while(i < count && j < bocsuBytesWritten) { |  | 
| 5980                     dest[i++] = buff[j++]; |  | 
| 5981                 } |  | 
| 5982             } |  | 
| 5983 |  | 
| 5984         } else { |  | 
| 5985             level = UCOL_PSK_NULL; |  | 
| 5986         } |  | 
| 5987         /* fall through to next level */ |  | 
| 5988     case UCOL_PSK_NULL: |  | 
| 5989         j = i; |  | 
| 5990         while(j<count) { |  | 
| 5991             dest[j++]=0; |  | 
| 5992         } |  | 
| 5993         break; |  | 
| 5994     default: |  | 
| 5995         *status = U_INTERNAL_PROGRAM_ERROR; |  | 
| 5996         UTRACE_EXIT_STATUS(*status); |  | 
| 5997         return 0; |  | 
| 5998     } |  | 
| 5999 |  | 
| 6000 saveState: |  | 
| 6001     // Now we need to return stuff. First we want to see whether we have |  | 
| 6002     // done everything for the current state of iterator. |  | 
| 6003     if(byteCountOrFrenchDone |  | 
| 6004         || canUpdateState == FALSE |  | 
| 6005         || (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE) |  | 
| 6006     { |  | 
| 6007         // Any of above mean that the previous transaction |  | 
| 6008         // wasn't finished and that we should store the |  | 
| 6009         // previous iterator state. |  | 
| 6010         state[0] = iterState; |  | 
| 6011     } else { |  | 
| 6012         // The transaction is complete. We will continue in the next iteration. |  | 
| 6013         state[0] = s.iterator->getState(s.iterator); |  | 
| 6014         cces = 0; |  | 
| 6015     } |  | 
| 6016     // Store the number of bocsu bytes written. |  | 
| 6017     if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) { |  | 
| 6018         *status = U_INDEX_OUTOFBOUNDS_ERROR; |  | 
| 6019     } |  | 
| 6020     state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BY
      TES_SHIFT; |  | 
| 6021 |  | 
| 6022     // Next we put in the level of comparison |  | 
| 6023     state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT); |  | 
| 6024 |  | 
| 6025     // If we are doing French, we need to store whether we have just finished th
      e French level |  | 
| 6026     if(level == UCOL_PSK_SECONDARY && doingFrench) { |  | 
| 6027         state[1] |= (((int32_t)(state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_D
      ONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT); |  | 
| 6028     } else { |  | 
| 6029         state[1] |= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE
      _MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT); |  | 
| 6030     } |  | 
| 6031 |  | 
| 6032     // Was the latest CE shifted |  | 
| 6033     if(wasShifted) { |  | 
| 6034         state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT; |  | 
| 6035     } |  | 
| 6036     // Check for cces overflow |  | 
| 6037     if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) { |  | 
| 6038         *status = U_INDEX_OUTOFBOUNDS_ERROR; |  | 
| 6039     } |  | 
| 6040     // Store cces |  | 
| 6041     state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SH
      IFT); |  | 
| 6042 |  | 
| 6043     // Check for French overflow |  | 
| 6044     if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) { |  | 
| 6045         *status = U_INDEX_OUTOFBOUNDS_ERROR; |  | 
| 6046     } |  | 
| 6047     // Store number of bytes written in the French secondary continuation sequen
      ce |  | 
| 6048     state[1] |= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENC
      H_SHIFT); |  | 
| 6049 |  | 
| 6050 |  | 
| 6051     // If we have used normalizing iterator, get rid of it |  | 
| 6052     if(normIter != NULL) { |  | 
| 6053         unorm_closeIter(normIter); |  | 
| 6054     } |  | 
| 6055 |  | 
| 6056     /* To avoid memory leak, free the offset buffer if necessary. */ |  | 
| 6057     ucol_freeOffsetBuffer(&s); |  | 
| 6058 |  | 
| 6059     // Return number of meaningful sortkey bytes. | 227     // Return number of meaningful sortkey bytes. | 
| 6060     UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", | 228     UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", | 
| 6061                   dest,i, state[0], state[1]); | 229                   dest,i, state[0], state[1]); | 
| 6062     UTRACE_EXIT_VALUE(i); | 230     UTRACE_EXIT_VALUE_STATUS(i, *status); | 
| 6063     return i; | 231     return i; | 
| 6064 } | 232 } | 
| 6065 | 233 | 
| 6066 /** | 234 /** | 
| 6067  * Produce a bound for a given sortkey and a number of levels. | 235  * Produce a bound for a given sortkey and a number of levels. | 
| 6068  */ | 236  */ | 
| 6069 U_CAPI int32_t U_EXPORT2 | 237 U_CAPI int32_t U_EXPORT2 | 
| 6070 ucol_getBound(const uint8_t       *source, | 238 ucol_getBound(const uint8_t       *source, | 
| 6071         int32_t             sourceLength, | 239         int32_t             sourceLength, | 
| 6072         UColBoundMode       boundType, | 240         UColBoundMode       boundType, | 
| 6073         uint32_t            noOfLevels, | 241         uint32_t            noOfLevels, | 
| 6074         uint8_t             *result, | 242         uint8_t             *result, | 
| 6075         int32_t             resultLength, | 243         int32_t             resultLength, | 
| 6076         UErrorCode          *status) | 244         UErrorCode          *status) | 
| 6077 { | 245 { | 
| 6078     // consistency checks | 246     // consistency checks | 
| 6079     if(status == NULL || U_FAILURE(*status)) { | 247     if(status == NULL || U_FAILURE(*status)) { | 
| 6080         return 0; | 248         return 0; | 
| 6081     } | 249     } | 
| 6082     if(source == NULL) { | 250     if(source == NULL) { | 
| 6083         *status = U_ILLEGAL_ARGUMENT_ERROR; | 251         *status = U_ILLEGAL_ARGUMENT_ERROR; | 
| 6084         return 0; | 252         return 0; | 
| 6085     } | 253     } | 
| 6086 | 254 | 
| 6087     int32_t sourceIndex = 0; | 255     int32_t sourceIndex = 0; | 
| 6088     // Scan the string until we skip enough of the key OR reach the end of the k
      ey | 256     // Scan the string until we skip enough of the key OR reach the end of the k
      ey | 
| 6089     do { | 257     do { | 
| 6090         sourceIndex++; | 258         sourceIndex++; | 
| 6091         if(source[sourceIndex] == UCOL_LEVELTERMINATOR) { | 259         if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) { | 
| 6092             noOfLevels--; | 260             noOfLevels--; | 
| 6093         } | 261         } | 
| 6094     } while (noOfLevels > 0 | 262     } while (noOfLevels > 0 | 
| 6095         && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); | 263         && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); | 
| 6096 | 264 | 
| 6097     if((source[sourceIndex] == 0 || sourceIndex == sourceLength) | 265     if((source[sourceIndex] == 0 || sourceIndex == sourceLength) | 
| 6098         && noOfLevels > 0) { | 266         && noOfLevels > 0) { | 
| 6099             *status = U_SORT_KEY_TOO_SHORT_WARNING; | 267             *status = U_SORT_KEY_TOO_SHORT_WARNING; | 
| 6100     } | 268     } | 
| 6101 | 269 | 
| (...skipping 22 matching lines...) Expand all  Loading... | 
| 6124             return 0; | 292             return 0; | 
| 6125         } | 293         } | 
| 6126         result[sourceIndex++] = 0; | 294         result[sourceIndex++] = 0; | 
| 6127 | 295 | 
| 6128         return sourceIndex; | 296         return sourceIndex; | 
| 6129     } else { | 297     } else { | 
| 6130         return sourceIndex+boundType+1; | 298         return sourceIndex+boundType+1; | 
| 6131     } | 299     } | 
| 6132 } | 300 } | 
| 6133 | 301 | 
| 6134 /****************************************************************************/ | 302 U_CAPI void U_EXPORT2 | 
| 6135 /* Following are the functions that deal with the properties of a collator  */ | 303 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCo
      de) { | 
| 6136 /* there are new APIs and some compatibility APIs                           */ | 304     if(U_FAILURE(*pErrorCode)) { return; } | 
| 6137 /****************************************************************************/ | 305     Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode); | 
| 6138 | 306 } | 
| 6139 static inline void | 307 | 
| 6140 ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE, | 308 U_CAPI UColReorderCode U_EXPORT2 | 
| 6141                     int32_t *primShift, int32_t *secShift, int32_t *terShift) | 309 ucol_getMaxVariable(const UCollator *coll) { | 
| 6142 { | 310     return Collator::fromUCollator(coll)->getMaxVariable(); | 
| 6143     uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0; |  | 
| 6144     UBool reverseSecondary = FALSE; |  | 
| 6145     UBool continuation = isContinuation(CE); |  | 
| 6146     if(!continuation) { |  | 
| 6147         tertiary = (uint8_t)((CE & coll->tertiaryMask)); |  | 
| 6148         tertiary ^= coll->caseSwitch; |  | 
| 6149         reverseSecondary = TRUE; |  | 
| 6150     } else { |  | 
| 6151         tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION)); |  | 
| 6152         tertiary &= UCOL_REMOVE_CASE; |  | 
| 6153         reverseSecondary = FALSE; |  | 
| 6154     } |  | 
| 6155 |  | 
| 6156     secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK); |  | 
| 6157     primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK); |  | 
| 6158     primary1 = (uint8_t)(CE >> 8); |  | 
| 6159 |  | 
| 6160     if(primary1 != 0) { |  | 
| 6161         if (coll->leadBytePermutationTable != NULL && !continuation) { |  | 
| 6162             primary1 = coll->leadBytePermutationTable[primary1]; |  | 
| 6163         } |  | 
| 6164 |  | 
| 6165         coll->latinOneCEs[ch] |= (primary1 << *primShift); |  | 
| 6166         *primShift -= 8; |  | 
| 6167     } |  | 
| 6168     if(primary2 != 0) { |  | 
| 6169         if(*primShift < 0) { |  | 
| 6170             coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE; |  | 
| 6171             coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; |  | 
| 6172             coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE; |  | 
| 6173             return; |  | 
| 6174         } |  | 
| 6175         coll->latinOneCEs[ch] |= (primary2 << *primShift); |  | 
| 6176         *primShift -= 8; |  | 
| 6177     } |  | 
| 6178     if(secondary != 0) { |  | 
| 6179         if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse se
      condary |  | 
| 6180             coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space fo
      r secondary |  | 
| 6181             coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24); |  | 
| 6182         } else { // normal case |  | 
| 6183             coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secSh
      ift); |  | 
| 6184         } |  | 
| 6185         *secShift -= 8; |  | 
| 6186     } |  | 
| 6187     if(tertiary != 0) { |  | 
| 6188         coll->latinOneCEs[2*coll->latinOneTableLen+ch] |= (tertiary << *terShift
      ); |  | 
| 6189         *terShift -= 8; |  | 
| 6190     } |  | 
| 6191 } |  | 
| 6192 |  | 
| 6193 static inline UBool |  | 
| 6194 ucol_resizeLatinOneTable(UCollator *coll, int32_t size, UErrorCode *status) { |  | 
| 6195     uint32_t *newTable = (uint32_t *)uprv_malloc(size*sizeof(uint32_t)*3); |  | 
| 6196     if(newTable == NULL) { |  | 
| 6197       *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 6198       coll->latinOneFailed = TRUE; |  | 
| 6199       return FALSE; |  | 
| 6200     } |  | 
| 6201     int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTable
      Len)*sizeof(uint32_t); |  | 
| 6202     uprv_memset(newTable, 0, size*sizeof(uint32_t)*3); |  | 
| 6203     uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy); |  | 
| 6204     uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToC
      opy); |  | 
| 6205     uprv_memcpy(newTable+2*size, coll->latinOneCEs+2*coll->latinOneTableLen, siz
      eToCopy); |  | 
| 6206     coll->latinOneTableLen = size; |  | 
| 6207     uprv_free(coll->latinOneCEs); |  | 
| 6208     coll->latinOneCEs = newTable; |  | 
| 6209     return TRUE; |  | 
| 6210 } |  | 
| 6211 |  | 
| 6212 static UBool |  | 
| 6213 ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) { |  | 
| 6214     UBool result = TRUE; |  | 
| 6215     if(coll->latinOneCEs == NULL) { |  | 
| 6216         coll->latinOneCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*UCOL_LATINO
      NETABLELEN*3); |  | 
| 6217         if(coll->latinOneCEs == NULL) { |  | 
| 6218             *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 6219             return FALSE; |  | 
| 6220         } |  | 
| 6221         coll->latinOneTableLen = UCOL_LATINONETABLELEN; |  | 
| 6222     } |  | 
| 6223     UChar ch = 0; |  | 
| 6224     UCollationElements *it = ucol_openElements(coll, &ch, 1, status); |  | 
| 6225     // Check for null pointer |  | 
| 6226     if (U_FAILURE(*status)) { |  | 
| 6227         ucol_closeElements(it); |  | 
| 6228         return FALSE; |  | 
| 6229     } |  | 
| 6230     uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)*coll->latinOneTableLen*3)
      ; |  | 
| 6231 |  | 
| 6232     int32_t primShift = 24, secShift = 24, terShift = 24; |  | 
| 6233     uint32_t CE = 0; |  | 
| 6234     int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1; |  | 
| 6235 |  | 
| 6236     // TODO: make safe if you get more than you wanted... |  | 
| 6237     for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) { |  | 
| 6238         primShift = 24; secShift = 24; terShift = 24; |  | 
| 6239         if(ch < 0x100) { |  | 
| 6240             CE = coll->latinOneMapping[ch]; |  | 
| 6241         } else { |  | 
| 6242             CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch); |  | 
| 6243             if(CE == UCOL_NOT_FOUND && coll->UCA) { |  | 
| 6244                 CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch); |  | 
| 6245             } |  | 
| 6246         } |  | 
| 6247         if(CE < UCOL_NOT_FOUND) { |  | 
| 6248             ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift
      ); |  | 
| 6249         } else { |  | 
| 6250             switch (getCETag(CE)) { |  | 
| 6251             case EXPANSION_TAG: |  | 
| 6252             case DIGIT_TAG: |  | 
| 6253                 ucol_setText(it, &ch, 1, status); |  | 
| 6254                 while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) { |  | 
| 6255                     if(primShift < 0 || secShift < 0 || terShift < 0) { |  | 
| 6256                         coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE; |  | 
| 6257                         coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL
      _OUT_CE; |  | 
| 6258                         coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BA
      IL_OUT_CE; |  | 
| 6259                         break; |  | 
| 6260                     } |  | 
| 6261                     ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &
      terShift); |  | 
| 6262                 } |  | 
| 6263                 break; |  | 
| 6264             case CONTRACTION_TAG: |  | 
| 6265                 // here is the trick |  | 
| 6266                 // F2 is contraction. We do something very similar to contractio
      ns |  | 
| 6267                 // but have two indices, one in the real contraction table and t
      he |  | 
| 6268                 // other to where we stuffed things. This hopes that we don't ha
      ve |  | 
| 6269                 // many contractions (this should work for latin-1 tables). |  | 
| 6270                 { |  | 
| 6271                     if((CE & 0x00FFF000) != 0) { |  | 
| 6272                         *status = U_UNSUPPORTED_ERROR; |  | 
| 6273                         goto cleanup_after_failure; |  | 
| 6274                     } |  | 
| 6275 |  | 
| 6276                     const UChar *UCharOffset = (UChar *)coll->image+getContractO
      ffset(CE); |  | 
| 6277 |  | 
| 6278                     CE |= (contractionOffset & 0xFFF) << 12; // insert the offse
      t in latin-1 table |  | 
| 6279 |  | 
| 6280                     coll->latinOneCEs[ch] = CE; |  | 
| 6281                     coll->latinOneCEs[coll->latinOneTableLen+ch] = CE; |  | 
| 6282                     coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE; |  | 
| 6283 |  | 
| 6284                     // We're going to jump into contraction table, pick the elem
      ents |  | 
| 6285                     // and use them |  | 
| 6286                     do { |  | 
| 6287                         CE = *(coll->contractionCEs + |  | 
| 6288                             (UCharOffset - coll->contractionIndex)); |  | 
| 6289                         if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG)
       { |  | 
| 6290                             uint32_t size; |  | 
| 6291                             uint32_t i;    /* general counter */ |  | 
| 6292                             uint32_t *CEOffset = (uint32_t *)coll->image+getExpa
      nsionOffset(CE); /* find the offset to expansion table */ |  | 
| 6293                             size = getExpansionCount(CE); |  | 
| 6294                             //CE = *CEOffset++; |  | 
| 6295                             if(size != 0) { /* if there are less than 16 element
      s in expansion, we don't terminate */ |  | 
| 6296                                 for(i = 0; i<size; i++) { |  | 
| 6297                                     if(primShift < 0 || secShift < 0 || terShift
       < 0) { |  | 
| 6298                                         coll->latinOneCEs[(UChar)contractionOffs
      et] = UCOL_BAIL_OUT_CE; |  | 
| 6299                                         coll->latinOneCEs[coll->latinOneTableLen
      +(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; |  | 
| 6300                                         coll->latinOneCEs[2*coll->latinOneTableL
      en+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; |  | 
| 6301                                         break; |  | 
| 6302                                     } |  | 
| 6303                                     ucol_addLatinOneEntry(coll, (UChar)contracti
      onOffset, *CEOffset++, &primShift, &secShift, &terShift); |  | 
| 6304                                 } |  | 
| 6305                             } else { /* else, we do */ |  | 
| 6306                                 while(*CEOffset != 0) { |  | 
| 6307                                     if(primShift < 0 || secShift < 0 || terShift
       < 0) { |  | 
| 6308                                         coll->latinOneCEs[(UChar)contractionOffs
      et] = UCOL_BAIL_OUT_CE; |  | 
| 6309                                         coll->latinOneCEs[coll->latinOneTableLen
      +(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; |  | 
| 6310                                         coll->latinOneCEs[2*coll->latinOneTableL
      en+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE; |  | 
| 6311                                         break; |  | 
| 6312                                     } |  | 
| 6313                                     ucol_addLatinOneEntry(coll, (UChar)contracti
      onOffset, *CEOffset++, &primShift, &secShift, &terShift); |  | 
| 6314                                 } |  | 
| 6315                             } |  | 
| 6316                             contractionOffset++; |  | 
| 6317                         } else if(CE < UCOL_NOT_FOUND) { |  | 
| 6318                             ucol_addLatinOneEntry(coll, (UChar)contractionOffset
      ++, CE, &primShift, &secShift, &terShift); |  | 
| 6319                         } else { |  | 
| 6320                             coll->latinOneCEs[(UChar)contractionOffset] = UCOL_B
      AIL_OUT_CE; |  | 
| 6321                             coll->latinOneCEs[coll->latinOneTableLen+(UChar)cont
      ractionOffset] = UCOL_BAIL_OUT_CE; |  | 
| 6322                             coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)co
      ntractionOffset] = UCOL_BAIL_OUT_CE; |  | 
| 6323                             contractionOffset++; |  | 
| 6324                         } |  | 
| 6325                         UCharOffset++; |  | 
| 6326                         primShift = 24; secShift = 24; terShift = 24; |  | 
| 6327                         if(contractionOffset == coll->latinOneTableLen) { // we 
      need to reallocate |  | 
| 6328                             if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneT
      ableLen, status)) { |  | 
| 6329                                 goto cleanup_after_failure; |  | 
| 6330                             } |  | 
| 6331                         } |  | 
| 6332                     } while(*UCharOffset != 0xFFFF); |  | 
| 6333                 } |  | 
| 6334                 break;; |  | 
| 6335             case SPEC_PROC_TAG: |  | 
| 6336                 { |  | 
| 6337                     // 0xB7 is a precontext character defined in UCA5.1, a speci
      al |  | 
| 6338                     // handle is implemeted in order to save LatinOne table for |  | 
| 6339                     // most locales. |  | 
| 6340                     if (ch==0xb7) { |  | 
| 6341                         ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShif
      t, &terShift); |  | 
| 6342                     } |  | 
| 6343                     else { |  | 
| 6344                         goto cleanup_after_failure; |  | 
| 6345                     } |  | 
| 6346                 } |  | 
| 6347                 break; |  | 
| 6348             default: |  | 
| 6349                 goto cleanup_after_failure; |  | 
| 6350             } |  | 
| 6351         } |  | 
| 6352     } |  | 
| 6353     // compact table |  | 
| 6354     if(contractionOffset < coll->latinOneTableLen) { |  | 
| 6355         if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) { |  | 
| 6356             goto cleanup_after_failure; |  | 
| 6357         } |  | 
| 6358     } |  | 
| 6359     ucol_closeElements(it); |  | 
| 6360     return result; |  | 
| 6361 |  | 
| 6362 cleanup_after_failure: |  | 
| 6363     // status should already be set before arriving here. |  | 
| 6364     coll->latinOneFailed = TRUE; |  | 
| 6365     ucol_closeElements(it); |  | 
| 6366     return FALSE; |  | 
| 6367 } |  | 
| 6368 |  | 
| 6369 void ucol_updateInternalState(UCollator *coll, UErrorCode *status) { |  | 
| 6370     if(U_SUCCESS(*status)) { |  | 
| 6371         if(coll->caseFirst == UCOL_UPPER_FIRST) { |  | 
| 6372             coll->caseSwitch = UCOL_CASE_SWITCH; |  | 
| 6373         } else { |  | 
| 6374             coll->caseSwitch = UCOL_NO_CASE_SWITCH; |  | 
| 6375         } |  | 
| 6376 |  | 
| 6377         if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) { |  | 
| 6378             coll->tertiaryMask = UCOL_REMOVE_CASE; |  | 
| 6379             coll->tertiaryCommon = UCOL_COMMON3_NORMAL; |  | 
| 6380             coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* 
      Should be 0x80 */ |  | 
| 6381             coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF; |  | 
| 6382             coll->tertiaryBottom = UCOL_COMMON_BOT3; |  | 
| 6383         } else { |  | 
| 6384             coll->tertiaryMask = UCOL_KEEP_CASE; |  | 
| 6385             coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON; |  | 
| 6386             if(coll->caseFirst == UCOL_UPPER_FIRST) { |  | 
| 6387                 coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST; |  | 
| 6388                 coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER; |  | 
| 6389                 coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER; |  | 
| 6390             } else { |  | 
| 6391                 coll->tertiaryCommon = UCOL_COMMON3_NORMAL; |  | 
| 6392                 coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER; |  | 
| 6393                 coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER; |  | 
| 6394             } |  | 
| 6395         } |  | 
| 6396 |  | 
| 6397         /* Set the compression values */ |  | 
| 6398         uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - coll->tertiaryBott
      om - 1); |  | 
| 6399         coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* w
      e multilply double with int, but need only int */ |  | 
| 6400         coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopC
      ount); |  | 
| 6401 |  | 
| 6402         if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY |  | 
| 6403             && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == U
      COL_NON_IGNORABLE) |  | 
| 6404         { |  | 
| 6405             coll->sortKeyGen = ucol_calcSortKeySimpleTertiary; |  | 
| 6406         } else { |  | 
| 6407             coll->sortKeyGen = ucol_calcSortKey; |  | 
| 6408         } |  | 
| 6409         if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && col
      l->numericCollation == UCOL_OFF |  | 
| 6410             && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneF
      ailed) |  | 
| 6411         { |  | 
| 6412             if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) { |  | 
| 6413                 if(ucol_setUpLatinOne(coll, status)) { // if we succeed in build
      ing latin1 table, we'll use it |  | 
| 6414                     //fprintf(stderr, "F"); |  | 
| 6415                     coll->latinOneUse = TRUE; |  | 
| 6416                 } else { |  | 
| 6417                     coll->latinOneUse = FALSE; |  | 
| 6418                 } |  | 
| 6419                 if(*status == U_UNSUPPORTED_ERROR) { |  | 
| 6420                     *status = U_ZERO_ERROR; |  | 
| 6421                 } |  | 
| 6422             } else { // latin1Table exists and it doesn't need to be regenerated
      , just use it |  | 
| 6423                 coll->latinOneUse = TRUE; |  | 
| 6424             } |  | 
| 6425         } else { |  | 
| 6426             coll->latinOneUse = FALSE; |  | 
| 6427         } |  | 
| 6428     } |  | 
| 6429 } | 311 } | 
| 6430 | 312 | 
| 6431 U_CAPI uint32_t  U_EXPORT2 | 313 U_CAPI uint32_t  U_EXPORT2 | 
| 6432 ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCod
      e *status) { | 314 ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCod
      e *status) { | 
| 6433     if(U_FAILURE(*status) || coll == NULL) { | 315     if(U_FAILURE(*status) || coll == NULL) { | 
| 6434         return 0; | 316         return 0; | 
| 6435     } | 317     } | 
| 6436     if(len == -1) { | 318     return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status); | 
| 6437         len = u_strlen(varTop); |  | 
| 6438     } |  | 
| 6439     if(len == 0) { |  | 
| 6440         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6441         return 0; |  | 
| 6442     } |  | 
| 6443 |  | 
| 6444     if(coll->delegate!=NULL) { |  | 
| 6445       return ((Collator*)coll->delegate)->setVariableTop(varTop, len, *status); |  | 
| 6446     } |  | 
| 6447 |  | 
| 6448 |  | 
| 6449     collIterate s; |  | 
| 6450     IInit_collIterate(coll, varTop, len, &s, status); |  | 
| 6451     if(U_FAILURE(*status)) { |  | 
| 6452         return 0; |  | 
| 6453     } |  | 
| 6454 |  | 
| 6455     uint32_t CE = ucol_IGetNextCE(coll, &s, status); |  | 
| 6456 |  | 
| 6457     /* here we check if we have consumed all characters */ |  | 
| 6458     /* you can put in either one character or a contraction */ |  | 
| 6459     /* you shouldn't put more... */ |  | 
| 6460     if(s.pos != s.endp || CE == UCOL_NO_MORE_CES) { |  | 
| 6461         *status = U_CE_NOT_FOUND_ERROR; |  | 
| 6462         return 0; |  | 
| 6463     } |  | 
| 6464 |  | 
| 6465     uint32_t nextCE = ucol_IGetNextCE(coll, &s, status); |  | 
| 6466 |  | 
| 6467     if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) { |  | 
| 6468         *status = U_PRIMARY_TOO_LONG_ERROR; |  | 
| 6469         return 0; |  | 
| 6470     } |  | 
| 6471     if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) { |  | 
| 6472         coll->variableTopValueisDefault = FALSE; |  | 
| 6473         coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16; |  | 
| 6474     } |  | 
| 6475 |  | 
| 6476     /* To avoid memory leak, free the offset buffer if necessary. */ |  | 
| 6477     ucol_freeOffsetBuffer(&s); |  | 
| 6478 |  | 
| 6479     return CE & UCOL_PRIMARYMASK; |  | 
| 6480 } | 319 } | 
| 6481 | 320 | 
| 6482 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode 
      *status) { | 321 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode 
      *status) { | 
| 6483     if(U_FAILURE(*status) || coll == NULL) { | 322     if(U_FAILURE(*status) || coll == NULL) { | 
| 6484         return 0; | 323         return 0; | 
| 6485     } | 324     } | 
| 6486     if(coll->delegate!=NULL) { | 325     return Collator::fromUCollator(coll)->getVariableTop(*status); | 
| 6487       return ((const Collator*)coll->delegate)->getVariableTop(*status); |  | 
| 6488     } |  | 
| 6489     return coll->variableTopValue<<16; |  | 
| 6490 } | 326 } | 
| 6491 | 327 | 
| 6492 U_CAPI void  U_EXPORT2 | 328 U_CAPI void  U_EXPORT2 | 
| 6493 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *stat
      us) { | 329 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *stat
      us) { | 
| 6494     if(U_FAILURE(*status) || coll == NULL) { | 330     if(U_FAILURE(*status) || coll == NULL) { | 
| 6495         return; | 331         return; | 
| 6496     } | 332     } | 
| 6497 | 333     Collator::fromUCollator(coll)->setVariableTop(varTop, *status); | 
| 6498     if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) { | 334 } | 
| 6499         coll->variableTopValueisDefault = FALSE; | 335 | 
| 6500         coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16; |  | 
| 6501     } |  | 
| 6502 } |  | 
| 6503 /* Attribute setter API */ |  | 
| 6504 U_CAPI void  U_EXPORT2 | 336 U_CAPI void  U_EXPORT2 | 
| 6505 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value,
       UErrorCode *status) { | 337 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value,
       UErrorCode *status) { | 
| 6506     if(U_FAILURE(*status) || coll == NULL) { | 338     if(U_FAILURE(*status) || coll == NULL) { | 
| 6507       return; | 339       return; | 
| 6508     } | 340     } | 
| 6509 | 341 | 
| 6510     if(coll->delegate != NULL) { | 342     Collator::fromUCollator(coll)->setAttribute(attr, value, *status); | 
| 6511       ((Collator*)coll->delegate)->setAttribute(attr,value,*status); |  | 
| 6512       return; |  | 
| 6513     } |  | 
| 6514 |  | 
| 6515     UColAttributeValue oldFrench = coll->frenchCollation; |  | 
| 6516     UColAttributeValue oldCaseFirst = coll->caseFirst; |  | 
| 6517     switch(attr) { |  | 
| 6518     case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */ |  | 
| 6519         if(value == UCOL_ON) { |  | 
| 6520             coll->numericCollation = UCOL_ON; |  | 
| 6521             coll->numericCollationisDefault = FALSE; |  | 
| 6522         } else if (value == UCOL_OFF) { |  | 
| 6523             coll->numericCollation = UCOL_OFF; |  | 
| 6524             coll->numericCollationisDefault = FALSE; |  | 
| 6525         } else if (value == UCOL_DEFAULT) { |  | 
| 6526             coll->numericCollationisDefault = TRUE; |  | 
| 6527             coll->numericCollation = (UColAttributeValue)coll->options->numericC
      ollation; |  | 
| 6528         } else { |  | 
| 6529             *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6530         } |  | 
| 6531         break; |  | 
| 6532     case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragan
      a */ |  | 
| 6533         if(value == UCOL_ON || value == UCOL_OFF || value == UCOL_DEFAULT) { |  | 
| 6534             // This attribute is an implementation detail of the CLDR Japanese t
      ailoring. |  | 
| 6535             // The implementation might change to use a different mechanism |  | 
| 6536             // to achieve the same Japanese sort order. |  | 
| 6537             // Since ICU 50, this attribute is not settable any more via API fun
      ctions. |  | 
| 6538         } else { |  | 
| 6539             *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6540         } |  | 
| 6541         break; |  | 
| 6542     case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*
      / |  | 
| 6543         if(value == UCOL_ON) { |  | 
| 6544             coll->frenchCollation = UCOL_ON; |  | 
| 6545             coll->frenchCollationisDefault = FALSE; |  | 
| 6546         } else if (value == UCOL_OFF) { |  | 
| 6547             coll->frenchCollation = UCOL_OFF; |  | 
| 6548             coll->frenchCollationisDefault = FALSE; |  | 
| 6549         } else if (value == UCOL_DEFAULT) { |  | 
| 6550             coll->frenchCollationisDefault = TRUE; |  | 
| 6551             coll->frenchCollation = (UColAttributeValue)coll->options->frenchCol
      lation; |  | 
| 6552         } else { |  | 
| 6553             *status = U_ILLEGAL_ARGUMENT_ERROR  ; |  | 
| 6554         } |  | 
| 6555         break; |  | 
| 6556     case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ |  | 
| 6557         if(value == UCOL_SHIFTED) { |  | 
| 6558             coll->alternateHandling = UCOL_SHIFTED; |  | 
| 6559             coll->alternateHandlingisDefault = FALSE; |  | 
| 6560         } else if (value == UCOL_NON_IGNORABLE) { |  | 
| 6561             coll->alternateHandling = UCOL_NON_IGNORABLE; |  | 
| 6562             coll->alternateHandlingisDefault = FALSE; |  | 
| 6563         } else if (value == UCOL_DEFAULT) { |  | 
| 6564             coll->alternateHandlingisDefault = TRUE; |  | 
| 6565             coll->alternateHandling = (UColAttributeValue)coll->options->alterna
      teHandling ; |  | 
| 6566         } else { |  | 
| 6567             *status = U_ILLEGAL_ARGUMENT_ERROR  ; |  | 
| 6568         } |  | 
| 6569         break; |  | 
| 6570     case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ |  | 
| 6571         if(value == UCOL_LOWER_FIRST) { |  | 
| 6572             coll->caseFirst = UCOL_LOWER_FIRST; |  | 
| 6573             coll->caseFirstisDefault = FALSE; |  | 
| 6574         } else if (value == UCOL_UPPER_FIRST) { |  | 
| 6575             coll->caseFirst = UCOL_UPPER_FIRST; |  | 
| 6576             coll->caseFirstisDefault = FALSE; |  | 
| 6577         } else if (value == UCOL_OFF) { |  | 
| 6578             coll->caseFirst = UCOL_OFF; |  | 
| 6579             coll->caseFirstisDefault = FALSE; |  | 
| 6580         } else if (value == UCOL_DEFAULT) { |  | 
| 6581             coll->caseFirst = (UColAttributeValue)coll->options->caseFirst; |  | 
| 6582             coll->caseFirstisDefault = TRUE; |  | 
| 6583         } else { |  | 
| 6584             *status = U_ILLEGAL_ARGUMENT_ERROR  ; |  | 
| 6585         } |  | 
| 6586         break; |  | 
| 6587     case UCOL_CASE_LEVEL: /* do we have an extra case level */ |  | 
| 6588         if(value == UCOL_ON) { |  | 
| 6589             coll->caseLevel = UCOL_ON; |  | 
| 6590             coll->caseLevelisDefault = FALSE; |  | 
| 6591         } else if (value == UCOL_OFF) { |  | 
| 6592             coll->caseLevel = UCOL_OFF; |  | 
| 6593             coll->caseLevelisDefault = FALSE; |  | 
| 6594         } else if (value == UCOL_DEFAULT) { |  | 
| 6595             coll->caseLevel = (UColAttributeValue)coll->options->caseLevel; |  | 
| 6596             coll->caseLevelisDefault = TRUE; |  | 
| 6597         } else { |  | 
| 6598             *status = U_ILLEGAL_ARGUMENT_ERROR  ; |  | 
| 6599         } |  | 
| 6600         break; |  | 
| 6601     case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ |  | 
| 6602         if(value == UCOL_ON) { |  | 
| 6603             coll->normalizationMode = UCOL_ON; |  | 
| 6604             coll->normalizationModeisDefault = FALSE; |  | 
| 6605             initializeFCD(status); |  | 
| 6606         } else if (value == UCOL_OFF) { |  | 
| 6607             coll->normalizationMode = UCOL_OFF; |  | 
| 6608             coll->normalizationModeisDefault = FALSE; |  | 
| 6609         } else if (value == UCOL_DEFAULT) { |  | 
| 6610             coll->normalizationModeisDefault = TRUE; |  | 
| 6611             coll->normalizationMode = (UColAttributeValue)coll->options->normali
      zationMode; |  | 
| 6612             if(coll->normalizationMode == UCOL_ON) { |  | 
| 6613                 initializeFCD(status); |  | 
| 6614             } |  | 
| 6615         } else { |  | 
| 6616             *status = U_ILLEGAL_ARGUMENT_ERROR  ; |  | 
| 6617         } |  | 
| 6618         break; |  | 
| 6619     case UCOL_STRENGTH:         /* attribute for strength */ |  | 
| 6620         if (value == UCOL_DEFAULT) { |  | 
| 6621             coll->strengthisDefault = TRUE; |  | 
| 6622             coll->strength = (UColAttributeValue)coll->options->strength; |  | 
| 6623         } else if (value <= UCOL_IDENTICAL) { |  | 
| 6624             coll->strengthisDefault = FALSE; |  | 
| 6625             coll->strength = value; |  | 
| 6626         } else { |  | 
| 6627             *status = U_ILLEGAL_ARGUMENT_ERROR  ; |  | 
| 6628         } |  | 
| 6629         break; |  | 
| 6630     case UCOL_ATTRIBUTE_COUNT: |  | 
| 6631     default: |  | 
| 6632         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6633         break; |  | 
| 6634     } |  | 
| 6635     if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) { |  | 
| 6636         coll->latinOneRegenTable = TRUE; |  | 
| 6637     } else { |  | 
| 6638         coll->latinOneRegenTable = FALSE; |  | 
| 6639     } |  | 
| 6640     ucol_updateInternalState(coll, status); |  | 
| 6641 } | 343 } | 
| 6642 | 344 | 
| 6643 U_CAPI UColAttributeValue  U_EXPORT2 | 345 U_CAPI UColAttributeValue  U_EXPORT2 | 
| 6644 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
       { | 346 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
       { | 
| 6645     if(U_FAILURE(*status) || coll == NULL) { | 347     if(U_FAILURE(*status) || coll == NULL) { | 
| 6646       return UCOL_DEFAULT; | 348       return UCOL_DEFAULT; | 
| 6647     } | 349     } | 
| 6648 | 350 | 
| 6649     if(coll->delegate != NULL) { | 351     return Collator::fromUCollator(coll)->getAttribute(attr, *status); | 
| 6650       return ((Collator*)coll->delegate)->getAttribute(attr,*status); |  | 
| 6651     } |  | 
| 6652 |  | 
| 6653     switch(attr) { |  | 
| 6654     case UCOL_NUMERIC_COLLATION: |  | 
| 6655       return coll->numericCollation; |  | 
| 6656     case UCOL_HIRAGANA_QUATERNARY_MODE: |  | 
| 6657       return coll->hiraganaQ; |  | 
| 6658     case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*
      / |  | 
| 6659         return coll->frenchCollation; |  | 
| 6660     case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/ |  | 
| 6661         return coll->alternateHandling; |  | 
| 6662     case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */ |  | 
| 6663         return coll->caseFirst; |  | 
| 6664     case UCOL_CASE_LEVEL: /* do we have an extra case level */ |  | 
| 6665         return coll->caseLevel; |  | 
| 6666     case UCOL_NORMALIZATION_MODE: /* attribute for normalization */ |  | 
| 6667         return coll->normalizationMode; |  | 
| 6668     case UCOL_STRENGTH:         /* attribute for strength */ |  | 
| 6669         return coll->strength; |  | 
| 6670     case UCOL_ATTRIBUTE_COUNT: |  | 
| 6671     default: |  | 
| 6672         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6673         break; |  | 
| 6674     } |  | 
| 6675     return UCOL_DEFAULT; |  | 
| 6676 } | 352 } | 
| 6677 | 353 | 
| 6678 U_CAPI void U_EXPORT2 | 354 U_CAPI void U_EXPORT2 | 
| 6679 ucol_setStrength(    UCollator                *coll, | 355 ucol_setStrength(    UCollator                *coll, | 
| 6680             UCollationStrength        strength) | 356             UCollationStrength        strength) | 
| 6681 { | 357 { | 
| 6682     UErrorCode status = U_ZERO_ERROR; | 358     UErrorCode status = U_ZERO_ERROR; | 
| 6683     ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); | 359     ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); | 
| 6684 } | 360 } | 
| 6685 | 361 | 
| 6686 U_CAPI UCollationStrength U_EXPORT2 | 362 U_CAPI UCollationStrength U_EXPORT2 | 
| 6687 ucol_getStrength(const UCollator *coll) | 363 ucol_getStrength(const UCollator *coll) | 
| 6688 { | 364 { | 
| 6689     UErrorCode status = U_ZERO_ERROR; | 365     UErrorCode status = U_ZERO_ERROR; | 
| 6690     return ucol_getAttribute(coll, UCOL_STRENGTH, &status); | 366     return ucol_getAttribute(coll, UCOL_STRENGTH, &status); | 
| 6691 } | 367 } | 
| 6692 | 368 | 
| 6693 U_CAPI int32_t U_EXPORT2 | 369 U_CAPI int32_t U_EXPORT2 | 
| 6694 ucol_getReorderCodes(const UCollator *coll, | 370 ucol_getReorderCodes(const UCollator *coll, | 
| 6695                     int32_t *dest, | 371                     int32_t *dest, | 
| 6696                     int32_t destCapacity, | 372                     int32_t destCapacity, | 
| 6697                     UErrorCode *status) { | 373                     UErrorCode *status) { | 
| 6698     if (U_FAILURE(*status)) { | 374     if (U_FAILURE(*status)) { | 
| 6699         return 0; | 375         return 0; | 
| 6700     } | 376     } | 
| 6701 | 377 | 
| 6702     if(coll->delegate!=NULL) { | 378     return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *s
      tatus); | 
| 6703       return ((const Collator*)coll->delegate)->getReorderCodes(dest, destCapaci
      ty, *status); |  | 
| 6704     } |  | 
| 6705 |  | 
| 6706     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { |  | 
| 6707         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6708         return 0; |  | 
| 6709     } |  | 
| 6710 |  | 
| 6711 #ifdef UCOL_DEBUG |  | 
| 6712     printf("coll->reorderCodesLength = %d\n", coll->reorderCodesLength); |  | 
| 6713     printf("coll->defaultReorderCodesLength = %d\n", coll->defaultReorderCodesLe
      ngth); |  | 
| 6714 #endif |  | 
| 6715 |  | 
| 6716     if (coll->reorderCodesLength > destCapacity) { |  | 
| 6717         *status = U_BUFFER_OVERFLOW_ERROR; |  | 
| 6718         return coll->reorderCodesLength; |  | 
| 6719     } |  | 
| 6720     for (int32_t i = 0; i < coll->reorderCodesLength; i++) { |  | 
| 6721         dest[i] = coll->reorderCodes[i]; |  | 
| 6722     } |  | 
| 6723     return coll->reorderCodesLength; |  | 
| 6724 } | 379 } | 
| 6725 | 380 | 
| 6726 U_CAPI void U_EXPORT2 | 381 U_CAPI void U_EXPORT2 | 
| 6727 ucol_setReorderCodes(UCollator* coll, | 382 ucol_setReorderCodes(UCollator* coll, | 
| 6728                     const int32_t* reorderCodes, | 383                     const int32_t* reorderCodes, | 
| 6729                     int32_t reorderCodesLength, | 384                     int32_t reorderCodesLength, | 
| 6730                     UErrorCode *status) { | 385                     UErrorCode *status) { | 
| 6731     if (U_FAILURE(*status)) { | 386     if (U_FAILURE(*status)) { | 
| 6732         return; | 387         return; | 
| 6733     } | 388     } | 
| 6734 | 389 | 
| 6735     if (reorderCodesLength < 0 || (reorderCodesLength > 0 && reorderCodes == NUL
      L)) { | 390     Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLen
      gth, *status); | 
| 6736         *status = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6737         return; |  | 
| 6738     } |  | 
| 6739 |  | 
| 6740     if(coll->delegate!=NULL) { |  | 
| 6741       ((Collator*)coll->delegate)->setReorderCodes(reorderCodes, reorderCodesLen
      gth, *status); |  | 
| 6742       return; |  | 
| 6743     } |  | 
| 6744 |  | 
| 6745     if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) { |  | 
| 6746         uprv_free(coll->reorderCodes); |  | 
| 6747     } |  | 
| 6748     coll->reorderCodes = NULL; |  | 
| 6749     coll->freeReorderCodesOnClose = FALSE; |  | 
| 6750     coll->reorderCodesLength = 0; |  | 
| 6751     if (reorderCodesLength == 0) { |  | 
| 6752         if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutat
      ionTableOnClose == TRUE) { |  | 
| 6753             uprv_free(coll->leadBytePermutationTable); |  | 
| 6754         } |  | 
| 6755         coll->leadBytePermutationTable = NULL; |  | 
| 6756         coll->freeLeadBytePermutationTableOnClose = FALSE; |  | 
| 6757         return; |  | 
| 6758     } |  | 
| 6759     coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int3
      2_t)); |  | 
| 6760     if (coll->reorderCodes == NULL) { |  | 
| 6761         *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 6762         return; |  | 
| 6763     } |  | 
| 6764     coll->freeReorderCodesOnClose = TRUE; |  | 
| 6765     for (int32_t i = 0; i < reorderCodesLength; i++) { |  | 
| 6766         coll->reorderCodes[i] = reorderCodes[i]; |  | 
| 6767     } |  | 
| 6768     coll->reorderCodesLength = reorderCodesLength; |  | 
| 6769     ucol_buildPermutationTable(coll, status); |  | 
| 6770 } | 391 } | 
| 6771 | 392 | 
| 6772 U_CAPI int32_t U_EXPORT2 | 393 U_CAPI int32_t U_EXPORT2 | 
| 6773 ucol_getEquivalentReorderCodes(int32_t reorderCode, | 394 ucol_getEquivalentReorderCodes(int32_t reorderCode, | 
| 6774                     int32_t* dest, | 395                     int32_t* dest, | 
| 6775                     int32_t destCapacity, | 396                     int32_t destCapacity, | 
| 6776                     UErrorCode *pErrorCode) { | 397                     UErrorCode *pErrorCode) { | 
| 6777     bool equivalentCodesSet[USCRIPT_CODE_LIMIT]; | 398     return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, 
      *pErrorCode); | 
| 6778     uint16_t leadBytes[256]; | 399 } | 
| 6779     int leadBytesCount; |  | 
| 6780     int leadByteIndex; |  | 
| 6781     int16_t reorderCodesForLeadByte[USCRIPT_CODE_LIMIT]; |  | 
| 6782     int reorderCodesForLeadByteCount; |  | 
| 6783     int reorderCodeIndex; |  | 
| 6784 |  | 
| 6785     int32_t equivalentCodesCount = 0; |  | 
| 6786     int setIndex; |  | 
| 6787 |  | 
| 6788     if (U_FAILURE(*pErrorCode)) { |  | 
| 6789         return 0; |  | 
| 6790     } |  | 
| 6791 |  | 
| 6792     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { |  | 
| 6793         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |  | 
| 6794         return 0; |  | 
| 6795     } |  | 
| 6796 |  | 
| 6797     uprv_memset(equivalentCodesSet, 0, USCRIPT_CODE_LIMIT * sizeof(bool)); |  | 
| 6798 |  | 
| 6799     const UCollator* uca = ucol_initUCA(pErrorCode); |  | 
| 6800     if (U_FAILURE(*pErrorCode)) { |  | 
| 6801 »       return 0; |  | 
| 6802     } |  | 
| 6803     leadBytesCount = ucol_getLeadBytesForReorderCode(uca, reorderCode, leadBytes
      , 256); |  | 
| 6804     for (leadByteIndex = 0; leadByteIndex < leadBytesCount; leadByteIndex++) { |  | 
| 6805         reorderCodesForLeadByteCount = ucol_getReorderCodesForLeadByte( |  | 
| 6806             uca, leadBytes[leadByteIndex], reorderCodesForLeadByte, USCRIPT_CODE
      _LIMIT); |  | 
| 6807         for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodesForLeadByteCou
      nt; reorderCodeIndex++) { |  | 
| 6808             equivalentCodesSet[reorderCodesForLeadByte[reorderCodeIndex]] = true
      ; |  | 
| 6809         } |  | 
| 6810     } |  | 
| 6811 |  | 
| 6812     for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) { |  | 
| 6813         if (equivalentCodesSet[setIndex] == true) { |  | 
| 6814             equivalentCodesCount++; |  | 
| 6815         } |  | 
| 6816     } |  | 
| 6817 |  | 
| 6818     if (destCapacity == 0) { |  | 
| 6819         return equivalentCodesCount; |  | 
| 6820     } |  | 
| 6821 |  | 
| 6822     equivalentCodesCount = 0; |  | 
| 6823     for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) { |  | 
| 6824         if (equivalentCodesSet[setIndex] == true) { |  | 
| 6825             dest[equivalentCodesCount++] = setIndex; |  | 
| 6826             if (equivalentCodesCount >= destCapacity) { |  | 
| 6827                 break; |  | 
| 6828             } |  | 
| 6829         } |  | 
| 6830     } |  | 
| 6831     return equivalentCodesCount; |  | 
| 6832 } |  | 
| 6833 |  | 
| 6834 |  | 
| 6835 /****************************************************************************/ |  | 
| 6836 /* Following are misc functions                                             */ |  | 
| 6837 /* there are new APIs and some compatibility APIs                           */ |  | 
| 6838 /****************************************************************************/ |  | 
| 6839 | 400 | 
| 6840 U_CAPI void U_EXPORT2 | 401 U_CAPI void U_EXPORT2 | 
| 6841 ucol_getVersion(const UCollator* coll, | 402 ucol_getVersion(const UCollator* coll, | 
| 6842                 UVersionInfo versionInfo) | 403                 UVersionInfo versionInfo) | 
| 6843 { | 404 { | 
| 6844     if(coll->delegate!=NULL) { | 405     Collator::fromUCollator(coll)->getVersion(versionInfo); | 
| 6845       ((const Collator*)coll->delegate)->getVersion(versionInfo); |  | 
| 6846       return; |  | 
| 6847     } |  | 
| 6848     /* RunTime version  */ |  | 
| 6849     uint8_t rtVersion = UCOL_RUNTIME_VERSION; |  | 
| 6850     /* Builder version*/ |  | 
| 6851     uint8_t bdVersion = coll->image->version[0]; |  | 
| 6852 |  | 
| 6853     /* Charset Version. Need to get the version from cnv files |  | 
| 6854      * makeconv should populate cnv files with version and |  | 
| 6855      * an api has to be provided in ucnv.h to obtain this version |  | 
| 6856      */ |  | 
| 6857     uint8_t csVersion = 0; |  | 
| 6858 |  | 
| 6859     /* combine the version info */ |  | 
| 6860     uint16_t cmbVersion = (uint16_t)((rtVersion<<11) | (bdVersion<<6) | (csVersi
      on)); |  | 
| 6861 |  | 
| 6862     /* Tailoring rules */ |  | 
| 6863     versionInfo[0] = (uint8_t)(cmbVersion>>8); |  | 
| 6864     versionInfo[1] = (uint8_t)cmbVersion; |  | 
| 6865     versionInfo[2] = coll->image->version[1]; |  | 
| 6866     if(coll->UCA) { |  | 
| 6867         /* Include the minor number when getting the UCA version. (major & 1f) <
      < 3 | (minor & 7) */ |  | 
| 6868         versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 | (coll->
      UCA->image->UCAVersion[1] & 0x07); |  | 
| 6869     } else { |  | 
| 6870         versionInfo[3] = 0; |  | 
| 6871     } |  | 
| 6872 } |  | 
| 6873 |  | 
| 6874 |  | 
| 6875 /* This internal API checks whether a character is tailored or not */ |  | 
| 6876 U_CAPI UBool  U_EXPORT2 |  | 
| 6877 ucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) { |  | 
| 6878     if(U_FAILURE(*status) || coll == NULL || coll == coll->UCA) { |  | 
| 6879         return FALSE; |  | 
| 6880     } |  | 
| 6881 |  | 
| 6882     uint32_t CE = UCOL_NOT_FOUND; |  | 
| 6883     const UChar *ContractionStart = NULL; |  | 
| 6884     if(u < 0x100) { /* latin-1 */ |  | 
| 6885         CE = coll->latinOneMapping[u]; |  | 
| 6886         if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) { |  | 
| 6887             return FALSE; |  | 
| 6888         } |  | 
| 6889     } else { /* regular */ |  | 
| 6890         CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u); |  | 
| 6891     } |  | 
| 6892 |  | 
| 6893     if(isContraction(CE)) { |  | 
| 6894         ContractionStart = (UChar *)coll->image+getContractOffset(CE); |  | 
| 6895         CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex)
      ); |  | 
| 6896     } |  | 
| 6897 |  | 
| 6898     return (UBool)(CE != UCOL_NOT_FOUND); |  | 
| 6899 } |  | 
| 6900 |  | 
| 6901 |  | 
| 6902 /****************************************************************************/ |  | 
| 6903 /* Following are the string compare functions                               */ |  | 
| 6904 /*                                                                          */ |  | 
| 6905 /****************************************************************************/ |  | 
| 6906 |  | 
| 6907 |  | 
| 6908 /*  ucol_checkIdent    internal function.  Does byte level string compare.   */ |  | 
| 6909 /*                     Used by strcoll if strength == identical and strings  */ |  | 
| 6910 /*                     are otherwise equal.                                  */ |  | 
| 6911 /*                                                                           */ |  | 
| 6912 /*                     Comparison must be done on NFD normalized strings.    */ |  | 
| 6913 /*                     FCD is not good enough.                               */ |  | 
| 6914 |  | 
| 6915 static |  | 
| 6916 UCollationResult    ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
      l normalize, UErrorCode *status) |  | 
| 6917 { |  | 
| 6918     // When we arrive here, we can have normal strings or UCharIterators. Curren
      tly they are both |  | 
| 6919     // of same type, but that doesn't really mean that it will stay that way. |  | 
| 6920     int32_t            comparison; |  | 
| 6921 |  | 
| 6922     if (sColl->flags & UCOL_USE_ITERATOR) { |  | 
| 6923         // The division for the array length may truncate the array size to |  | 
| 6924         // a little less than UNORM_ITER_SIZE, but that size is dimensioned too 
      high |  | 
| 6925         // for all platforms anyway. |  | 
| 6926         UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; |  | 
| 6927         UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; |  | 
| 6928         UNormIterator *sNIt = NULL, *tNIt = NULL; |  | 
| 6929         sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status); |  | 
| 6930         tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status); |  | 
| 6931         sColl->iterator->move(sColl->iterator, 0, UITER_START); |  | 
| 6932         tColl->iterator->move(tColl->iterator, 0, UITER_START); |  | 
| 6933         UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, sta
      tus); |  | 
| 6934         UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, sta
      tus); |  | 
| 6935         comparison = u_strCompareIter(sIt, tIt, TRUE); |  | 
| 6936         unorm_closeIter(sNIt); |  | 
| 6937         unorm_closeIter(tNIt); |  | 
| 6938     } else { |  | 
| 6939         int32_t sLen      = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl-
      >endp - sColl->string) : -1; |  | 
| 6940         const UChar *sBuf = sColl->string; |  | 
| 6941         int32_t tLen      = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl-
      >endp - tColl->string) : -1; |  | 
| 6942         const UChar *tBuf = tColl->string; |  | 
| 6943 |  | 
| 6944         if (normalize) { |  | 
| 6945             *status = U_ZERO_ERROR; |  | 
| 6946             // Note: We could use Normalizer::compare() or similar, but for shor
      t strings |  | 
| 6947             // which may not be in FCD it might be faster to just NFD them. |  | 
| 6948             // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather tha
      n |  | 
| 6949             // NFD'ing immediately might be faster for long strings, |  | 
| 6950             // but string comparison is usually done on relatively short strings
      . |  | 
| 6951             sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN
      ) == 0, sBuf, sLen), |  | 
| 6952                                   sColl->writableBuffer, |  | 
| 6953                                   *status); |  | 
| 6954             tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN
      ) == 0, tBuf, tLen), |  | 
| 6955                                   tColl->writableBuffer, |  | 
| 6956                                   *status); |  | 
| 6957             if(U_FAILURE(*status)) { |  | 
| 6958                 return UCOL_LESS; |  | 
| 6959             } |  | 
| 6960             comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writ
      ableBuffer); |  | 
| 6961         } else { |  | 
| 6962             comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE); |  | 
| 6963         } |  | 
| 6964     } |  | 
| 6965 |  | 
| 6966     if (comparison < 0) { |  | 
| 6967         return UCOL_LESS; |  | 
| 6968     } else if (comparison == 0) { |  | 
| 6969         return UCOL_EQUAL; |  | 
| 6970     } else /* comparison > 0 */ { |  | 
| 6971         return UCOL_GREATER; |  | 
| 6972     } |  | 
| 6973 } |  | 
| 6974 |  | 
| 6975 /*  CEBuf - A struct and some inline functions to handle the saving    */ |  | 
| 6976 /*          of CEs in a buffer within ucol_strcoll                     */ |  | 
| 6977 |  | 
| 6978 #define UCOL_CEBUF_SIZE 512 |  | 
| 6979 typedef struct ucol_CEBuf { |  | 
| 6980     uint32_t    *buf; |  | 
| 6981     uint32_t    *endp; |  | 
| 6982     uint32_t    *pos; |  | 
| 6983     uint32_t     localArray[UCOL_CEBUF_SIZE]; |  | 
| 6984 } ucol_CEBuf; |  | 
| 6985 |  | 
| 6986 |  | 
| 6987 static |  | 
| 6988 inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) { |  | 
| 6989     (b)->buf = (b)->pos = (b)->localArray; |  | 
| 6990     (b)->endp = (b)->buf + UCOL_CEBUF_SIZE; |  | 
| 6991 } |  | 
| 6992 |  | 
| 6993 static |  | 
| 6994 void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) { |  | 
| 6995     uint32_t  oldSize; |  | 
| 6996     uint32_t  newSize; |  | 
| 6997     uint32_t  *newBuf; |  | 
| 6998 |  | 
| 6999     ci->flags |= UCOL_ITER_ALLOCATED; |  | 
| 7000     oldSize = (uint32_t)(b->pos - b->buf); |  | 
| 7001     newSize = oldSize * 2; |  | 
| 7002     newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t)); |  | 
| 7003     if(newBuf == NULL) { |  | 
| 7004         *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 7005     } |  | 
| 7006     else { |  | 
| 7007         uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t)); |  | 
| 7008         if (b->buf != b->localArray) { |  | 
| 7009             uprv_free(b->buf); |  | 
| 7010         } |  | 
| 7011         b->buf = newBuf; |  | 
| 7012         b->endp = b->buf + newSize; |  | 
| 7013         b->pos  = b->buf + oldSize; |  | 
| 7014     } |  | 
| 7015 } |  | 
| 7016 |  | 
| 7017 static |  | 
| 7018 inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCo
      de *status) { |  | 
| 7019     if (b->pos == b->endp) { |  | 
| 7020         ucol_CEBuf_Expand(b, ci, status); |  | 
| 7021     } |  | 
| 7022     if (U_SUCCESS(*status)) { |  | 
| 7023         *(b)->pos++ = ce; |  | 
| 7024     } |  | 
| 7025 } |  | 
| 7026 |  | 
| 7027 /* This is a trick string compare function that goes in and uses sortkeys to com
      pare */ |  | 
| 7028 /* It is used when compare gets in trouble and needs to bail out                
           */ |  | 
| 7029 static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl, |  | 
| 7030                                                   collIterate *tColl, |  | 
| 7031                                                   UErrorCode *status) |  | 
| 7032 { |  | 
| 7033     uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER]; |  | 
| 7034     uint8_t *sourceKeyP = sourceKey; |  | 
| 7035     uint8_t *targetKeyP = targetKey; |  | 
| 7036     int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER; |  | 
| 7037     const UCollator *coll = sColl->coll; |  | 
| 7038     const UChar *source = NULL; |  | 
| 7039     const UChar *target = NULL; |  | 
| 7040     int32_t result = UCOL_EQUAL; |  | 
| 7041     UnicodeString sourceString, targetString; |  | 
| 7042     int32_t sourceLength; |  | 
| 7043     int32_t targetLength; |  | 
| 7044 |  | 
| 7045     if(sColl->flags & UCOL_USE_ITERATOR) { |  | 
| 7046         sColl->iterator->move(sColl->iterator, 0, UITER_START); |  | 
| 7047         tColl->iterator->move(tColl->iterator, 0, UITER_START); |  | 
| 7048         UChar32 c; |  | 
| 7049         while((c=sColl->iterator->next(sColl->iterator))>=0) { |  | 
| 7050             sourceString.append((UChar)c); |  | 
| 7051         } |  | 
| 7052         while((c=tColl->iterator->next(tColl->iterator))>=0) { |  | 
| 7053             targetString.append((UChar)c); |  | 
| 7054         } |  | 
| 7055         source = sourceString.getBuffer(); |  | 
| 7056         sourceLength = sourceString.length(); |  | 
| 7057         target = targetString.getBuffer(); |  | 
| 7058         targetLength = targetString.length(); |  | 
| 7059     } else { // no iterators |  | 
| 7060         sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sCo
      ll->string):-1; |  | 
| 7061         targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tCo
      ll->string):-1; |  | 
| 7062         source = sColl->string; |  | 
| 7063         target = tColl->string; |  | 
| 7064     } |  | 
| 7065 |  | 
| 7066 |  | 
| 7067 |  | 
| 7068     sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourc
      eKeyLen); |  | 
| 7069     if(sourceKeyLen > UCOL_MAX_BUFFER) { |  | 
| 7070         sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t)); |  | 
| 7071         if(sourceKeyP == NULL) { |  | 
| 7072             *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 7073             goto cleanup_and_do_compare; |  | 
| 7074         } |  | 
| 7075         sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, s
      ourceKeyLen); |  | 
| 7076     } |  | 
| 7077 |  | 
| 7078     targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targe
      tKeyLen); |  | 
| 7079     if(targetKeyLen > UCOL_MAX_BUFFER) { |  | 
| 7080         targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t)); |  | 
| 7081         if(targetKeyP == NULL) { |  | 
| 7082             *status = U_MEMORY_ALLOCATION_ERROR; |  | 
| 7083             goto cleanup_and_do_compare; |  | 
| 7084         } |  | 
| 7085         targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, t
      argetKeyLen); |  | 
| 7086     } |  | 
| 7087 |  | 
| 7088     result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP); |  | 
| 7089 |  | 
| 7090 cleanup_and_do_compare: |  | 
| 7091     if(sourceKeyP != NULL && sourceKeyP != sourceKey) { |  | 
| 7092         uprv_free(sourceKeyP); |  | 
| 7093     } |  | 
| 7094 |  | 
| 7095     if(targetKeyP != NULL && targetKeyP != targetKey) { |  | 
| 7096         uprv_free(targetKeyP); |  | 
| 7097     } |  | 
| 7098 |  | 
| 7099     if(result<0) { |  | 
| 7100         return UCOL_LESS; |  | 
| 7101     } else if(result>0) { |  | 
| 7102         return UCOL_GREATER; |  | 
| 7103     } else { |  | 
| 7104         return UCOL_EQUAL; |  | 
| 7105     } |  | 
| 7106 } |  | 
| 7107 |  | 
| 7108 |  | 
| 7109 static UCollationResult |  | 
| 7110 ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status) |  | 
| 7111 { |  | 
| 7112     U_ALIGN_CODE(16); |  | 
| 7113 |  | 
| 7114     const UCollator *coll = sColl->coll; |  | 
| 7115 |  | 
| 7116 |  | 
| 7117     // setting up the collator parameters |  | 
| 7118     UColAttributeValue strength = coll->strength; |  | 
| 7119     UBool initialCheckSecTer = (strength  >= UCOL_SECONDARY); |  | 
| 7120 |  | 
| 7121     UBool checkSecTer = initialCheckSecTer; |  | 
| 7122     UBool checkTertiary = (strength  >= UCOL_TERTIARY); |  | 
| 7123     UBool checkQuad = (strength  >= UCOL_QUATERNARY); |  | 
| 7124     UBool checkIdent = (strength == UCOL_IDENTICAL); |  | 
| 7125     UBool checkCase = (coll->caseLevel == UCOL_ON); |  | 
| 7126     UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer; |  | 
| 7127     UBool shifted = (coll->alternateHandling == UCOL_SHIFTED); |  | 
| 7128     UBool qShifted = shifted && checkQuad; |  | 
| 7129     UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad; |  | 
| 7130 |  | 
| 7131     if(doHiragana && shifted) { |  | 
| 7132         return (ucol_compareUsingSortKeys(sColl, tColl, status)); |  | 
| 7133     } |  | 
| 7134     uint8_t caseSwitch = coll->caseSwitch; |  | 
| 7135     uint8_t tertiaryMask = coll->tertiaryMask; |  | 
| 7136 |  | 
| 7137     // This is the lowest primary value that will not be ignored if shifted |  | 
| 7138     uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0; |  | 
| 7139 |  | 
| 7140     UCollationResult result = UCOL_EQUAL; |  | 
| 7141     UCollationResult hirResult = UCOL_EQUAL; |  | 
| 7142 |  | 
| 7143     // Preparing the CE buffers. They will be filled during the primary phase |  | 
| 7144     ucol_CEBuf   sCEs; |  | 
| 7145     ucol_CEBuf   tCEs; |  | 
| 7146     UCOL_INIT_CEBUF(&sCEs); |  | 
| 7147     UCOL_INIT_CEBUF(&tCEs); |  | 
| 7148 |  | 
| 7149     uint32_t secS = 0, secT = 0; |  | 
| 7150     uint32_t sOrder=0, tOrder=0; |  | 
| 7151 |  | 
| 7152     // Non shifted primary processing is quite simple |  | 
| 7153     if(!shifted) { |  | 
| 7154         for(;;) { |  | 
| 7155             // We fetch CEs until we hit a non ignorable primary or end. |  | 
| 7156             uint32_t sPrimary; |  | 
| 7157             do { |  | 
| 7158                 // We get the next CE |  | 
| 7159                 sOrder = ucol_IGetNextCE(coll, sColl, status); |  | 
| 7160                 // Stuff it in the buffer |  | 
| 7161                 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7162                 // And keep just the primary part. |  | 
| 7163                 sPrimary = sOrder & UCOL_PRIMARYMASK; |  | 
| 7164             } while(sPrimary == 0); |  | 
| 7165 |  | 
| 7166             // see the comments on the above block |  | 
| 7167             uint32_t tPrimary; |  | 
| 7168             do { |  | 
| 7169                 tOrder = ucol_IGetNextCE(coll, tColl, status); |  | 
| 7170                 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7171                 tPrimary = tOrder & UCOL_PRIMARYMASK; |  | 
| 7172             } while(tPrimary == 0); |  | 
| 7173 |  | 
| 7174             // if both primaries are the same |  | 
| 7175             if(sPrimary == tPrimary) { |  | 
| 7176                 // and there are no more CEs, we advance to the next level |  | 
| 7177                 if(sPrimary == UCOL_NO_MORE_CES_PRIMARY) { |  | 
| 7178                     break; |  | 
| 7179                 } |  | 
| 7180                 if(doHiragana && hirResult == UCOL_EQUAL) { |  | 
| 7181                     if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCO
      L_WAS_HIRAGANA)) { |  | 
| 7182                         hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl
      ->flags & UCOL_WAS_HIRAGANA)) |  | 
| 7183                             ? UCOL_LESS:UCOL_GREATER; |  | 
| 7184                     } |  | 
| 7185                 } |  | 
| 7186             } else { |  | 
| 7187                 // only need to check one for continuation |  | 
| 7188                 // if one is then the other must be or the preceding CE would be
       a prefix of the other |  | 
| 7189                 if (coll->leadBytePermutationTable != NULL && !isContinuation(sO
      rder)) { |  | 
| 7190                     sPrimary = (coll->leadBytePermutationTable[sPrimary>>24] << 
      24) | (sPrimary & 0x00FFFFFF); |  | 
| 7191                     tPrimary = (coll->leadBytePermutationTable[tPrimary>>24] << 
      24) | (tPrimary & 0x00FFFFFF); |  | 
| 7192                 } |  | 
| 7193                 // if two primaries are different, we are done |  | 
| 7194                 result = (sPrimary < tPrimary) ?  UCOL_LESS: UCOL_GREATER; |  | 
| 7195                 goto commonReturn; |  | 
| 7196             } |  | 
| 7197         } // no primary difference... do the rest from the buffers |  | 
| 7198     } else { // shifted - do a slightly more complicated processing :) |  | 
| 7199         for(;;) { |  | 
| 7200             UBool sInShifted = FALSE; |  | 
| 7201             UBool tInShifted = FALSE; |  | 
| 7202             // This version of code can be refactored. However, it seems easier 
      to understand this way. |  | 
| 7203             // Source loop. Same as the target loop. |  | 
| 7204             for(;;) { |  | 
| 7205                 sOrder = ucol_IGetNextCE(coll, sColl, status); |  | 
| 7206                 if(sOrder == UCOL_NO_MORE_CES) { |  | 
| 7207                     UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7208                     break; |  | 
| 7209                 } else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMA
      SK) == 0)) { |  | 
| 7210                     /* UCA amendment - ignore ignorables that follow shifted cod
      e points */ |  | 
| 7211                     continue; |  | 
| 7212                 } else if(isContinuation(sOrder)) { |  | 
| 7213                     if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary va
      lue */ |  | 
| 7214                         if(sInShifted) { |  | 
| 7215                             sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* pres
      erve interesting continuation */ |  | 
| 7216                             UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7217                             continue; |  | 
| 7218                         } else { |  | 
| 7219                             UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7220                             break; |  | 
| 7221                         } |  | 
| 7222                     } else { /* Just lower level values */ |  | 
| 7223                         if(sInShifted) { |  | 
| 7224                             continue; |  | 
| 7225                         } else { |  | 
| 7226                             UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7227                             continue; |  | 
| 7228                         } |  | 
| 7229                     } |  | 
| 7230                 } else { /* regular */ |  | 
| 7231                     if(coll->leadBytePermutationTable != NULL){ |  | 
| 7232                         sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 
      24) | (sOrder & 0x00FFFFFF); |  | 
| 7233                     } |  | 
| 7234                     if((sOrder & UCOL_PRIMARYMASK) > LVT) { |  | 
| 7235                         UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7236                         break; |  | 
| 7237                     } else { |  | 
| 7238                         if((sOrder & UCOL_PRIMARYMASK) > 0) { |  | 
| 7239                             sInShifted = TRUE; |  | 
| 7240                             sOrder &= UCOL_PRIMARYMASK; |  | 
| 7241                             UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7242                             continue; |  | 
| 7243                         } else { |  | 
| 7244                             UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status); |  | 
| 7245                             sInShifted = FALSE; |  | 
| 7246                             continue; |  | 
| 7247                         } |  | 
| 7248                     } |  | 
| 7249                 } |  | 
| 7250             } |  | 
| 7251             sOrder &= UCOL_PRIMARYMASK; |  | 
| 7252             sInShifted = FALSE; |  | 
| 7253 |  | 
| 7254             for(;;) { |  | 
| 7255                 tOrder = ucol_IGetNextCE(coll, tColl, status); |  | 
| 7256                 if(tOrder == UCOL_NO_MORE_CES) { |  | 
| 7257                     UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7258                     break; |  | 
| 7259                 } else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMA
      SK) == 0)) { |  | 
| 7260                     /* UCA amendment - ignore ignorables that follow shifted cod
      e points */ |  | 
| 7261                     continue; |  | 
| 7262                 } else if(isContinuation(tOrder)) { |  | 
| 7263                     if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary va
      lue */ |  | 
| 7264                         if(tInShifted) { |  | 
| 7265                             tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* pres
      erve interesting continuation */ |  | 
| 7266                             UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7267                             continue; |  | 
| 7268                         } else { |  | 
| 7269                             UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7270                             break; |  | 
| 7271                         } |  | 
| 7272                     } else { /* Just lower level values */ |  | 
| 7273                         if(tInShifted) { |  | 
| 7274                             continue; |  | 
| 7275                         } else { |  | 
| 7276                             UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7277                             continue; |  | 
| 7278                         } |  | 
| 7279                     } |  | 
| 7280                 } else { /* regular */ |  | 
| 7281                     if(coll->leadBytePermutationTable != NULL){ |  | 
| 7282                         tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 
      24) | (tOrder & 0x00FFFFFF); |  | 
| 7283                     } |  | 
| 7284                     if((tOrder & UCOL_PRIMARYMASK) > LVT) { |  | 
| 7285                         UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7286                         break; |  | 
| 7287                     } else { |  | 
| 7288                         if((tOrder & UCOL_PRIMARYMASK) > 0) { |  | 
| 7289                             tInShifted = TRUE; |  | 
| 7290                             tOrder &= UCOL_PRIMARYMASK; |  | 
| 7291                             UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7292                             continue; |  | 
| 7293                         } else { |  | 
| 7294                             UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status); |  | 
| 7295                             tInShifted = FALSE; |  | 
| 7296                             continue; |  | 
| 7297                         } |  | 
| 7298                     } |  | 
| 7299                 } |  | 
| 7300             } |  | 
| 7301             tOrder &= UCOL_PRIMARYMASK; |  | 
| 7302             tInShifted = FALSE; |  | 
| 7303 |  | 
| 7304             if(sOrder == tOrder) { |  | 
| 7305                 /* |  | 
| 7306                 if(doHiragana && hirResult == UCOL_EQUAL) { |  | 
| 7307                 if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_
      HIRAGANA)) { |  | 
| 7308                 hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & 
      UCOL_WAS_HIRAGANA)) |  | 
| 7309                 ? UCOL_LESS:UCOL_GREATER; |  | 
| 7310                 } |  | 
| 7311                 } |  | 
| 7312                 */ |  | 
| 7313                 if(sOrder == UCOL_NO_MORE_CES_PRIMARY) { |  | 
| 7314                     break; |  | 
| 7315                 } else { |  | 
| 7316                     sOrder = 0; |  | 
| 7317                     tOrder = 0; |  | 
| 7318                     continue; |  | 
| 7319                 } |  | 
| 7320             } else { |  | 
| 7321                 result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER; |  | 
| 7322                 goto commonReturn; |  | 
| 7323             } |  | 
| 7324         } /* no primary difference... do the rest from the buffers */ |  | 
| 7325     } |  | 
| 7326 |  | 
| 7327     /* now, we're gonna reexamine collected CEs */ |  | 
| 7328     uint32_t    *sCE; |  | 
| 7329     uint32_t    *tCE; |  | 
| 7330 |  | 
| 7331     /* This is the secondary level of comparison */ |  | 
| 7332     if(checkSecTer) { |  | 
| 7333         if(!isFrenchSec) { /* normal */ |  | 
| 7334             sCE = sCEs.buf; |  | 
| 7335             tCE = tCEs.buf; |  | 
| 7336             for(;;) { |  | 
| 7337                 while (secS == 0) { |  | 
| 7338                     secS = *(sCE++) & UCOL_SECONDARYMASK; |  | 
| 7339                 } |  | 
| 7340 |  | 
| 7341                 while(secT == 0) { |  | 
| 7342                     secT = *(tCE++) & UCOL_SECONDARYMASK; |  | 
| 7343                 } |  | 
| 7344 |  | 
| 7345                 if(secS == secT) { |  | 
| 7346                     if(secS == UCOL_NO_MORE_CES_SECONDARY) { |  | 
| 7347                         break; |  | 
| 7348                     } else { |  | 
| 7349                         secS = 0; secT = 0; |  | 
| 7350                         continue; |  | 
| 7351                     } |  | 
| 7352                 } else { |  | 
| 7353                     result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; |  | 
| 7354                     goto commonReturn; |  | 
| 7355                 } |  | 
| 7356             } |  | 
| 7357         } else { /* do the French */ |  | 
| 7358             uint32_t *sCESave = NULL; |  | 
| 7359             uint32_t *tCESave = NULL; |  | 
| 7360             sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimi
      zed */ |  | 
| 7361             tCE = tCEs.pos-2; |  | 
| 7362             for(;;) { |  | 
| 7363                 while (secS == 0 && sCE >= sCEs.buf) { |  | 
| 7364                     if(sCESave == NULL) { |  | 
| 7365                         secS = *(sCE--); |  | 
| 7366                         if(isContinuation(secS)) { |  | 
| 7367                             while(isContinuation(secS = *(sCE--))) |  | 
| 7368                                 ; |  | 
| 7369                             /* after this, secS has the start of continuation, a
      nd sCEs points before that */ |  | 
| 7370                             sCESave = sCE; /* we save it, so that we know where 
      to come back AND that we need to go forward */ |  | 
| 7371                             sCE+=2;  /* need to point to the first continuation 
      CP */ |  | 
| 7372                             /* However, now you can just continue doing stuff */ |  | 
| 7373                         } |  | 
| 7374                     } else { |  | 
| 7375                         secS = *(sCE++); |  | 
| 7376                         if(!isContinuation(secS)) { /* This means we have finish
      ed with this cont */ |  | 
| 7377                             sCE = sCESave;            /* reset the pointer to be
      fore continuation */ |  | 
| 7378                             sCESave = NULL; |  | 
| 7379                             secS = 0;  /* Fetch a fresh CE before the continuati
      on sequence. */ |  | 
| 7380                             continue; |  | 
| 7381                         } |  | 
| 7382                     } |  | 
| 7383                     secS &= UCOL_SECONDARYMASK; /* remove the continuation bit *
      / |  | 
| 7384                 } |  | 
| 7385 |  | 
| 7386                 while(secT == 0 && tCE >= tCEs.buf) { |  | 
| 7387                     if(tCESave == NULL) { |  | 
| 7388                         secT = *(tCE--); |  | 
| 7389                         if(isContinuation(secT)) { |  | 
| 7390                             while(isContinuation(secT = *(tCE--))) |  | 
| 7391                                 ; |  | 
| 7392                             /* after this, secS has the start of continuation, a
      nd sCEs points before that */ |  | 
| 7393                             tCESave = tCE; /* we save it, so that we know where 
      to come back AND that we need to go forward */ |  | 
| 7394                             tCE+=2;  /* need to point to the first continuation 
      CP */ |  | 
| 7395                             /* However, now you can just continue doing stuff */ |  | 
| 7396                         } |  | 
| 7397                     } else { |  | 
| 7398                         secT = *(tCE++); |  | 
| 7399                         if(!isContinuation(secT)) { /* This means we have finish
      ed with this cont */ |  | 
| 7400                             tCE = tCESave;          /* reset the pointer to befo
      re continuation */ |  | 
| 7401                             tCESave = NULL; |  | 
| 7402                             secT = 0;  /* Fetch a fresh CE before the continuati
      on sequence. */ |  | 
| 7403                             continue; |  | 
| 7404                         } |  | 
| 7405                     } |  | 
| 7406                     secT &= UCOL_SECONDARYMASK; /* remove the continuation bit *
      / |  | 
| 7407                 } |  | 
| 7408 |  | 
| 7409                 if(secS == secT) { |  | 
| 7410                     if(secS == UCOL_NO_MORE_CES_SECONDARY || (sCE < sCEs.buf && 
      tCE < tCEs.buf)) { |  | 
| 7411                         break; |  | 
| 7412                     } else { |  | 
| 7413                         secS = 0; secT = 0; |  | 
| 7414                         continue; |  | 
| 7415                     } |  | 
| 7416                 } else { |  | 
| 7417                     result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; |  | 
| 7418                     goto commonReturn; |  | 
| 7419                 } |  | 
| 7420             } |  | 
| 7421         } |  | 
| 7422     } |  | 
| 7423 |  | 
| 7424     /* doing the case bit */ |  | 
| 7425     if(checkCase) { |  | 
| 7426         sCE = sCEs.buf; |  | 
| 7427         tCE = tCEs.buf; |  | 
| 7428         for(;;) { |  | 
| 7429             while((secS & UCOL_REMOVE_CASE) == 0) { |  | 
| 7430                 if(!isContinuation(*sCE++)) { |  | 
| 7431                     secS =*(sCE-1); |  | 
| 7432                     if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMA
      RY) { |  | 
| 7433                         // primary ignorables should not be considered on the ca
      se level when the strength is primary |  | 
| 7434                         // otherwise, the CEs stop being well-formed |  | 
| 7435                         secS &= UCOL_TERT_CASE_MASK; |  | 
| 7436                         secS ^= caseSwitch; |  | 
| 7437                     } else { |  | 
| 7438                         secS = 0; |  | 
| 7439                     } |  | 
| 7440                 } else { |  | 
| 7441                     secS = 0; |  | 
| 7442                 } |  | 
| 7443             } |  | 
| 7444 |  | 
| 7445             while((secT & UCOL_REMOVE_CASE) == 0) { |  | 
| 7446                 if(!isContinuation(*tCE++)) { |  | 
| 7447                     secT = *(tCE-1); |  | 
| 7448                     if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMA
      RY) { |  | 
| 7449                         // primary ignorables should not be considered on the ca
      se level when the strength is primary |  | 
| 7450                         // otherwise, the CEs stop being well-formed |  | 
| 7451                         secT &= UCOL_TERT_CASE_MASK; |  | 
| 7452                         secT ^= caseSwitch; |  | 
| 7453                     } else { |  | 
| 7454                         secT = 0; |  | 
| 7455                     } |  | 
| 7456                 } else { |  | 
| 7457                     secT = 0; |  | 
| 7458                 } |  | 
| 7459             } |  | 
| 7460 |  | 
| 7461             if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) { |  | 
| 7462                 result = UCOL_LESS; |  | 
| 7463                 goto commonReturn; |  | 
| 7464             } else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK))
       { |  | 
| 7465                 result = UCOL_GREATER; |  | 
| 7466                 goto commonReturn; |  | 
| 7467             } |  | 
| 7468 |  | 
| 7469             if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY || (secT &
       UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) { |  | 
| 7470                 break; |  | 
| 7471             } else { |  | 
| 7472                 secS = 0; |  | 
| 7473                 secT = 0; |  | 
| 7474             } |  | 
| 7475         } |  | 
| 7476     } |  | 
| 7477 |  | 
| 7478     /* Tertiary level */ |  | 
| 7479     if(checkTertiary) { |  | 
| 7480         secS = 0; |  | 
| 7481         secT = 0; |  | 
| 7482         sCE = sCEs.buf; |  | 
| 7483         tCE = tCEs.buf; |  | 
| 7484         for(;;) { |  | 
| 7485             while((secS & UCOL_REMOVE_CASE) == 0) { |  | 
| 7486                 sOrder = *sCE++; |  | 
| 7487                 secS = sOrder & tertiaryMask; |  | 
| 7488                 if(!isContinuation(sOrder)) { |  | 
| 7489                     secS ^= caseSwitch; |  | 
| 7490                 } else { |  | 
| 7491                     secS &= UCOL_REMOVE_CASE; |  | 
| 7492                 } |  | 
| 7493             } |  | 
| 7494 |  | 
| 7495             while((secT & UCOL_REMOVE_CASE)  == 0) { |  | 
| 7496                 tOrder = *tCE++; |  | 
| 7497                 secT = tOrder & tertiaryMask; |  | 
| 7498                 if(!isContinuation(tOrder)) { |  | 
| 7499                     secT ^= caseSwitch; |  | 
| 7500                 } else { |  | 
| 7501                     secT &= UCOL_REMOVE_CASE; |  | 
| 7502                 } |  | 
| 7503             } |  | 
| 7504 |  | 
| 7505             if(secS == secT) { |  | 
| 7506                 if((secS & UCOL_REMOVE_CASE) == 1) { |  | 
| 7507                     break; |  | 
| 7508                 } else { |  | 
| 7509                     secS = 0; secT = 0; |  | 
| 7510                     continue; |  | 
| 7511                 } |  | 
| 7512             } else { |  | 
| 7513                 result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; |  | 
| 7514                 goto commonReturn; |  | 
| 7515             } |  | 
| 7516         } |  | 
| 7517     } |  | 
| 7518 |  | 
| 7519 |  | 
| 7520     if(qShifted /*checkQuad*/) { |  | 
| 7521         UBool sInShifted = TRUE; |  | 
| 7522         UBool tInShifted = TRUE; |  | 
| 7523         secS = 0; |  | 
| 7524         secT = 0; |  | 
| 7525         sCE = sCEs.buf; |  | 
| 7526         tCE = tCEs.buf; |  | 
| 7527         for(;;) { |  | 
| 7528             while((secS == 0 && secS != UCOL_NO_MORE_CES) || (isContinuation(sec
      S) && !sInShifted)) { |  | 
| 7529                 secS = *(sCE++); |  | 
| 7530                 if(isContinuation(secS)) { |  | 
| 7531                     if(!sInShifted) { |  | 
| 7532                         continue; |  | 
| 7533                     } |  | 
| 7534                 } else if(secS > LVT || (secS & UCOL_PRIMARYMASK) == 0) { /* non
       continuation */ |  | 
| 7535                     secS = UCOL_PRIMARYMASK; |  | 
| 7536                     sInShifted = FALSE; |  | 
| 7537                 } else { |  | 
| 7538                     sInShifted = TRUE; |  | 
| 7539                 } |  | 
| 7540             } |  | 
| 7541             secS &= UCOL_PRIMARYMASK; |  | 
| 7542 |  | 
| 7543 |  | 
| 7544             while((secT == 0 && secT != UCOL_NO_MORE_CES) || (isContinuation(sec
      T) && !tInShifted)) { |  | 
| 7545                 secT = *(tCE++); |  | 
| 7546                 if(isContinuation(secT)) { |  | 
| 7547                     if(!tInShifted) { |  | 
| 7548                         continue; |  | 
| 7549                     } |  | 
| 7550                 } else if(secT > LVT || (secT & UCOL_PRIMARYMASK) == 0) { |  | 
| 7551                     secT = UCOL_PRIMARYMASK; |  | 
| 7552                     tInShifted = FALSE; |  | 
| 7553                 } else { |  | 
| 7554                     tInShifted = TRUE; |  | 
| 7555                 } |  | 
| 7556             } |  | 
| 7557             secT &= UCOL_PRIMARYMASK; |  | 
| 7558 |  | 
| 7559             if(secS == secT) { |  | 
| 7560                 if(secS == UCOL_NO_MORE_CES_PRIMARY) { |  | 
| 7561                     break; |  | 
| 7562                 } else { |  | 
| 7563                     secS = 0; secT = 0; |  | 
| 7564                     continue; |  | 
| 7565                 } |  | 
| 7566             } else { |  | 
| 7567                 result = (secS < secT) ? UCOL_LESS : UCOL_GREATER; |  | 
| 7568                 goto commonReturn; |  | 
| 7569             } |  | 
| 7570         } |  | 
| 7571     } else if(doHiragana && hirResult != UCOL_EQUAL) { |  | 
| 7572         // If we're fine on quaternaries, we might be different |  | 
| 7573         // on Hiragana. This, however, might fail us in shifted. |  | 
| 7574         result = hirResult; |  | 
| 7575         goto commonReturn; |  | 
| 7576     } |  | 
| 7577 |  | 
| 7578     /*  For IDENTICAL comparisons, we use a bitwise character comparison */ |  | 
| 7579     /*  as a tiebreaker if all else is equal.                                */ |  | 
| 7580     /*  Getting here  should be quite rare - strings are not identical -     */ |  | 
| 7581     /*     that is checked first, but compared == through all other checks.  */ |  | 
| 7582     if(checkIdent) |  | 
| 7583     { |  | 
| 7584         //result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UC
      OL_ON); |  | 
| 7585         result = ucol_checkIdent(sColl, tColl, TRUE, status); |  | 
| 7586     } |  | 
| 7587 |  | 
| 7588 commonReturn: |  | 
| 7589     if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) { |  | 
| 7590         if (sCEs.buf != sCEs.localArray ) { |  | 
| 7591             uprv_free(sCEs.buf); |  | 
| 7592         } |  | 
| 7593         if (tCEs.buf != tCEs.localArray ) { |  | 
| 7594             uprv_free(tCEs.buf); |  | 
| 7595         } |  | 
| 7596     } |  | 
| 7597 |  | 
| 7598     return result; |  | 
| 7599 } |  | 
| 7600 |  | 
| 7601 static UCollationResult |  | 
| 7602 ucol_strcollRegular(const UCollator *coll, |  | 
| 7603                     const UChar *source, int32_t sourceLength, |  | 
| 7604                     const UChar *target, int32_t targetLength, |  | 
| 7605                     UErrorCode *status) { |  | 
| 7606     collIterate sColl, tColl; |  | 
| 7607     // Preparing the context objects for iterating over strings |  | 
| 7608     IInit_collIterate(coll, source, sourceLength, &sColl, status); |  | 
| 7609     IInit_collIterate(coll, target, targetLength, &tColl, status); |  | 
| 7610     if(U_FAILURE(*status)) { |  | 
| 7611         return UCOL_LESS; |  | 
| 7612     } |  | 
| 7613     return ucol_strcollRegular(&sColl, &tColl, status); |  | 
| 7614 } |  | 
| 7615 |  | 
| 7616 static inline uint32_t |  | 
| 7617 ucol_getLatinOneContraction(const UCollator *coll, int32_t strength, |  | 
| 7618                           uint32_t CE, const UChar *s, int32_t *index, int32_t l
      en) |  | 
| 7619 { |  | 
| 7620     const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF); |  | 
| 7621     int32_t latinOneOffset = (CE & 0x00FFF000) >> 12; |  | 
| 7622     int32_t offset = 1; |  | 
| 7623     UChar schar = 0, tchar = 0; |  | 
| 7624 |  | 
| 7625     for(;;) { |  | 
| 7626         if(len == -1) { |  | 
| 7627             if(s[*index] == 0) { // end of string |  | 
| 7628                 return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOn
      eOffset]); |  | 
| 7629             } else { |  | 
| 7630                 schar = s[*index]; |  | 
| 7631             } |  | 
| 7632         } else { |  | 
| 7633             if(*index == len) { |  | 
| 7634                 return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOn
      eOffset]); |  | 
| 7635             } else { |  | 
| 7636                 schar = s[*index]; |  | 
| 7637             } |  | 
| 7638         } |  | 
| 7639 |  | 
| 7640         while(schar > (tchar = *(UCharOffset+offset))) { /* since the contractio
      n codepoints should be ordered, we skip all that are smaller */ |  | 
| 7641             offset++; |  | 
| 7642         } |  | 
| 7643 |  | 
| 7644         if (schar == tchar) { |  | 
| 7645             (*index)++; |  | 
| 7646             return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff
      set+offset]); |  | 
| 7647         } |  | 
| 7648         else |  | 
| 7649         { |  | 
| 7650             if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) { |  | 
| 7651                 return UCOL_BAIL_OUT_CE; |  | 
| 7652             } |  | 
| 7653             // skip completely ignorables |  | 
| 7654             uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar); |  | 
| 7655             if(isZeroCE == 0) { // we have to ignore completely ignorables |  | 
| 7656                 (*index)++; |  | 
| 7657                 continue; |  | 
| 7658             } |  | 
| 7659 |  | 
| 7660             return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff
      set]); |  | 
| 7661         } |  | 
| 7662     } |  | 
| 7663 } |  | 
| 7664 |  | 
| 7665 |  | 
| 7666 /** |  | 
| 7667  * This is a fast strcoll, geared towards text in Latin-1. |  | 
| 7668  * It supports contractions of size two, French secondaries |  | 
| 7669  * and case switching. You can use it with strengths primary |  | 
| 7670  * to tertiary. It does not support shifted and case level. |  | 
| 7671  * It relies on the table build by setupLatin1Table. If it |  | 
| 7672  * doesn't understand something, it will go to the regular |  | 
| 7673  * strcoll. |  | 
| 7674  */ |  | 
| 7675 static UCollationResult |  | 
| 7676 ucol_strcollUseLatin1( const UCollator    *coll, |  | 
| 7677               const UChar        *source, |  | 
| 7678               int32_t            sLen, |  | 
| 7679               const UChar        *target, |  | 
| 7680               int32_t            tLen, |  | 
| 7681               UErrorCode *status) |  | 
| 7682 { |  | 
| 7683     U_ALIGN_CODE(16); |  | 
| 7684     int32_t strength = coll->strength; |  | 
| 7685 |  | 
| 7686     int32_t sIndex = 0, tIndex = 0; |  | 
| 7687     UChar sChar = 0, tChar = 0; |  | 
| 7688     uint32_t sOrder=0, tOrder=0; |  | 
| 7689 |  | 
| 7690     UBool endOfSource = FALSE; |  | 
| 7691 |  | 
| 7692     uint32_t *elements = coll->latinOneCEs; |  | 
| 7693 |  | 
| 7694     UBool haveContractions = FALSE; // if we have contractions in our string |  | 
| 7695                                     // we cannot do French secondary |  | 
| 7696 |  | 
| 7697     // Do the primary level |  | 
| 7698     for(;;) { |  | 
| 7699         while(sOrder==0) { // this loop skips primary ignorables |  | 
| 7700             // sOrder=getNextlatinOneCE(source); |  | 
| 7701             if(sLen==-1) {   // handling zero terminated strings |  | 
| 7702                 sChar=source[sIndex++]; |  | 
| 7703                 if(sChar==0) { |  | 
| 7704                     endOfSource = TRUE; |  | 
| 7705                     break; |  | 
| 7706                 } |  | 
| 7707             } else {        // handling strings with known length |  | 
| 7708                 if(sIndex==sLen) { |  | 
| 7709                     endOfSource = TRUE; |  | 
| 7710                     break; |  | 
| 7711                 } |  | 
| 7712                 sChar=source[sIndex++]; |  | 
| 7713             } |  | 
| 7714             if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sCha
      r > 0xFF, but this is faster on win32) |  | 
| 7715                 //fprintf(stderr, "R"); |  | 
| 7716                 return ucol_strcollRegular(coll, source, sLen, target, tLen, sta
      tus); |  | 
| 7717             } |  | 
| 7718             sOrder = elements[sChar]; |  | 
| 7719             if(sOrder >= UCOL_NOT_FOUND) { // if we got a special |  | 
| 7720                 // specials can basically be either contractions or bail-out sig
      ns. If we get anything |  | 
| 7721                 // else, we'll bail out anywasy |  | 
| 7722                 if(getCETag(sOrder) == CONTRACTION_TAG) { |  | 
| 7723                     sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOr
      der, source, &sIndex, sLen); |  | 
| 7724                     haveContractions = TRUE; // if there are contractions, we ca
      nnot do French secondary |  | 
| 7725                     // However, if there are contractions in the table, but we a
      lways use just one char, |  | 
| 7726                     // we might be able to do French. This should be checked out
      . |  | 
| 7727                 } |  | 
| 7728                 if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { |  | 
| 7729                     //fprintf(stderr, "S"); |  | 
| 7730                     return ucol_strcollRegular(coll, source, sLen, target, tLen,
       status); |  | 
| 7731                 } |  | 
| 7732             } |  | 
| 7733         } |  | 
| 7734 |  | 
| 7735         while(tOrder==0) {  // this loop skips primary ignorables |  | 
| 7736             // tOrder=getNextlatinOneCE(target); |  | 
| 7737             if(tLen==-1) {    // handling zero terminated strings |  | 
| 7738                 tChar=target[tIndex++]; |  | 
| 7739                 if(tChar==0) { |  | 
| 7740                     if(endOfSource) { // this is different than source loop, |  | 
| 7741                         // as we already know that source loop is done here, |  | 
| 7742                         // so we can either finish the primary loop if both |  | 
| 7743                         // strings are done or anounce the result if only |  | 
| 7744                         // target is done. Same below. |  | 
| 7745                         goto endOfPrimLoop; |  | 
| 7746                     } else { |  | 
| 7747                         return UCOL_GREATER; |  | 
| 7748                     } |  | 
| 7749                 } |  | 
| 7750             } else {          // handling strings with known length |  | 
| 7751                 if(tIndex==tLen) { |  | 
| 7752                     if(endOfSource) { |  | 
| 7753                         goto endOfPrimLoop; |  | 
| 7754                     } else { |  | 
| 7755                         return UCOL_GREATER; |  | 
| 7756                     } |  | 
| 7757                 } |  | 
| 7758                 tChar=target[tIndex++]; |  | 
| 7759             } |  | 
| 7760             if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sCha
      r > 0xFF, but this is faster on win32) |  | 
| 7761                 //fprintf(stderr, "R"); |  | 
| 7762                 return ucol_strcollRegular(coll, source, sLen, target, tLen, sta
      tus); |  | 
| 7763             } |  | 
| 7764             tOrder = elements[tChar]; |  | 
| 7765             if(tOrder >= UCOL_NOT_FOUND) { |  | 
| 7766                 // Handling specials, see the comments for source |  | 
| 7767                 if(getCETag(tOrder) == CONTRACTION_TAG) { |  | 
| 7768                     tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOr
      der, target, &tIndex, tLen); |  | 
| 7769                     haveContractions = TRUE; |  | 
| 7770                 } |  | 
| 7771                 if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { |  | 
| 7772                     //fprintf(stderr, "S"); |  | 
| 7773                     return ucol_strcollRegular(coll, source, sLen, target, tLen,
       status); |  | 
| 7774                 } |  | 
| 7775             } |  | 
| 7776         } |  | 
| 7777         if(endOfSource) { // source is finished, but target is not, say the resu
      lt. |  | 
| 7778             return UCOL_LESS; |  | 
| 7779         } |  | 
| 7780 |  | 
| 7781         if(sOrder == tOrder) { // if we have same CEs, we continue the loop |  | 
| 7782             sOrder = 0; tOrder = 0; |  | 
| 7783             continue; |  | 
| 7784         } else { |  | 
| 7785             // compare current top bytes |  | 
| 7786             if(((sOrder^tOrder)&0xFF000000)!=0) { |  | 
| 7787                 // top bytes differ, return difference |  | 
| 7788                 if(sOrder < tOrder) { |  | 
| 7789                     return UCOL_LESS; |  | 
| 7790                 } else if(sOrder > tOrder) { |  | 
| 7791                     return UCOL_GREATER; |  | 
| 7792                 } |  | 
| 7793                 // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24)
      ; |  | 
| 7794                 // since we must return enum value |  | 
| 7795             } |  | 
| 7796 |  | 
| 7797             // top bytes match, continue with following bytes |  | 
| 7798             sOrder<<=8; |  | 
| 7799             tOrder<<=8; |  | 
| 7800         } |  | 
| 7801     } |  | 
| 7802 |  | 
| 7803 endOfPrimLoop: |  | 
| 7804     // after primary loop, we definitely know the sizes of strings, |  | 
| 7805     // so we set it and use simpler loop for secondaries and tertiaries |  | 
| 7806     sLen = sIndex; tLen = tIndex; |  | 
| 7807     if(strength >= UCOL_SECONDARY) { |  | 
| 7808         // adjust the table beggining |  | 
| 7809         elements += coll->latinOneTableLen; |  | 
| 7810         endOfSource = FALSE; |  | 
| 7811 |  | 
| 7812         if(coll->frenchCollation == UCOL_OFF) { // non French |  | 
| 7813             // This loop is a simplified copy of primary loop |  | 
| 7814             // at this point we know that whole strings are latin-1, so we don't |  | 
| 7815             // check for that. We also know that we only have contractions as |  | 
| 7816             // specials. |  | 
| 7817             sIndex = 0; tIndex = 0; |  | 
| 7818             for(;;) { |  | 
| 7819                 while(sOrder==0) { |  | 
| 7820                     if(sIndex==sLen) { |  | 
| 7821                         endOfSource = TRUE; |  | 
| 7822                         break; |  | 
| 7823                     } |  | 
| 7824                     sChar=source[sIndex++]; |  | 
| 7825                     sOrder = elements[sChar]; |  | 
| 7826                     if(sOrder > UCOL_NOT_FOUND) { |  | 
| 7827                         sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDAR
      Y, sOrder, source, &sIndex, sLen); |  | 
| 7828                     } |  | 
| 7829                 } |  | 
| 7830 |  | 
| 7831                 while(tOrder==0) { |  | 
| 7832                     if(tIndex==tLen) { |  | 
| 7833                         if(endOfSource) { |  | 
| 7834                             goto endOfSecLoop; |  | 
| 7835                         } else { |  | 
| 7836                             return UCOL_GREATER; |  | 
| 7837                         } |  | 
| 7838                     } |  | 
| 7839                     tChar=target[tIndex++]; |  | 
| 7840                     tOrder = elements[tChar]; |  | 
| 7841                     if(tOrder > UCOL_NOT_FOUND) { |  | 
| 7842                         tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDAR
      Y, tOrder, target, &tIndex, tLen); |  | 
| 7843                     } |  | 
| 7844                 } |  | 
| 7845                 if(endOfSource) { |  | 
| 7846                     return UCOL_LESS; |  | 
| 7847                 } |  | 
| 7848 |  | 
| 7849                 if(sOrder == tOrder) { |  | 
| 7850                     sOrder = 0; tOrder = 0; |  | 
| 7851                     continue; |  | 
| 7852                 } else { |  | 
| 7853                     // see primary loop for comments on this |  | 
| 7854                     if(((sOrder^tOrder)&0xFF000000)!=0) { |  | 
| 7855                         if(sOrder < tOrder) { |  | 
| 7856                             return UCOL_LESS; |  | 
| 7857                         } else if(sOrder > tOrder) { |  | 
| 7858                             return UCOL_GREATER; |  | 
| 7859                         } |  | 
| 7860                     } |  | 
| 7861                     sOrder<<=8; |  | 
| 7862                     tOrder<<=8; |  | 
| 7863                 } |  | 
| 7864             } |  | 
| 7865         } else { // French |  | 
| 7866             if(haveContractions) { // if we have contractions, we have to bail o
      ut |  | 
| 7867                 // since we don't really know how to handle them here |  | 
| 7868                 return ucol_strcollRegular(coll, source, sLen, target, tLen, sta
      tus); |  | 
| 7869             } |  | 
| 7870             // For French, we go backwards |  | 
| 7871             sIndex = sLen; tIndex = tLen; |  | 
| 7872             for(;;) { |  | 
| 7873                 while(sOrder==0) { |  | 
| 7874                     if(sIndex==0) { |  | 
| 7875                         endOfSource = TRUE; |  | 
| 7876                         break; |  | 
| 7877                     } |  | 
| 7878                     sChar=source[--sIndex]; |  | 
| 7879                     sOrder = elements[sChar]; |  | 
| 7880                     // don't even look for contractions |  | 
| 7881                 } |  | 
| 7882 |  | 
| 7883                 while(tOrder==0) { |  | 
| 7884                     if(tIndex==0) { |  | 
| 7885                         if(endOfSource) { |  | 
| 7886                             goto endOfSecLoop; |  | 
| 7887                         } else { |  | 
| 7888                             return UCOL_GREATER; |  | 
| 7889                         } |  | 
| 7890                     } |  | 
| 7891                     tChar=target[--tIndex]; |  | 
| 7892                     tOrder = elements[tChar]; |  | 
| 7893                     // don't even look for contractions |  | 
| 7894                 } |  | 
| 7895                 if(endOfSource) { |  | 
| 7896                     return UCOL_LESS; |  | 
| 7897                 } |  | 
| 7898 |  | 
| 7899                 if(sOrder == tOrder) { |  | 
| 7900                     sOrder = 0; tOrder = 0; |  | 
| 7901                     continue; |  | 
| 7902                 } else { |  | 
| 7903                     // see the primary loop for comments |  | 
| 7904                     if(((sOrder^tOrder)&0xFF000000)!=0) { |  | 
| 7905                         if(sOrder < tOrder) { |  | 
| 7906                             return UCOL_LESS; |  | 
| 7907                         } else if(sOrder > tOrder) { |  | 
| 7908                             return UCOL_GREATER; |  | 
| 7909                         } |  | 
| 7910                     } |  | 
| 7911                     sOrder<<=8; |  | 
| 7912                     tOrder<<=8; |  | 
| 7913                 } |  | 
| 7914             } |  | 
| 7915         } |  | 
| 7916     } |  | 
| 7917 |  | 
| 7918 endOfSecLoop: |  | 
| 7919     if(strength >= UCOL_TERTIARY) { |  | 
| 7920         // tertiary loop is the same as secondary (except no French) |  | 
| 7921         elements += coll->latinOneTableLen; |  | 
| 7922         sIndex = 0; tIndex = 0; |  | 
| 7923         endOfSource = FALSE; |  | 
| 7924         for(;;) { |  | 
| 7925             while(sOrder==0) { |  | 
| 7926                 if(sIndex==sLen) { |  | 
| 7927                     endOfSource = TRUE; |  | 
| 7928                     break; |  | 
| 7929                 } |  | 
| 7930                 sChar=source[sIndex++]; |  | 
| 7931                 sOrder = elements[sChar]; |  | 
| 7932                 if(sOrder > UCOL_NOT_FOUND) { |  | 
| 7933                     sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sO
      rder, source, &sIndex, sLen); |  | 
| 7934                 } |  | 
| 7935             } |  | 
| 7936             while(tOrder==0) { |  | 
| 7937                 if(tIndex==tLen) { |  | 
| 7938                     if(endOfSource) { |  | 
| 7939                         return UCOL_EQUAL; // if both strings are at the end, th
      ey are equal |  | 
| 7940                     } else { |  | 
| 7941                         return UCOL_GREATER; |  | 
| 7942                     } |  | 
| 7943                 } |  | 
| 7944                 tChar=target[tIndex++]; |  | 
| 7945                 tOrder = elements[tChar]; |  | 
| 7946                 if(tOrder > UCOL_NOT_FOUND) { |  | 
| 7947                     tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tO
      rder, target, &tIndex, tLen); |  | 
| 7948                 } |  | 
| 7949             } |  | 
| 7950             if(endOfSource) { |  | 
| 7951                 return UCOL_LESS; |  | 
| 7952             } |  | 
| 7953             if(sOrder == tOrder) { |  | 
| 7954                 sOrder = 0; tOrder = 0; |  | 
| 7955                 continue; |  | 
| 7956             } else { |  | 
| 7957                 if(((sOrder^tOrder)&0xff000000)!=0) { |  | 
| 7958                     if(sOrder < tOrder) { |  | 
| 7959                         return UCOL_LESS; |  | 
| 7960                     } else if(sOrder > tOrder) { |  | 
| 7961                         return UCOL_GREATER; |  | 
| 7962                     } |  | 
| 7963                 } |  | 
| 7964                 sOrder<<=8; |  | 
| 7965                 tOrder<<=8; |  | 
| 7966             } |  | 
| 7967         } |  | 
| 7968     } |  | 
| 7969     return UCOL_EQUAL; |  | 
| 7970 } |  | 
| 7971 |  | 
| 7972 /* |  | 
| 7973   Note: ucol_strcollUTF8 supports null terminated input. Calculating length of |  | 
| 7974   null terminated input string takes extra amount of CPU cycles. |  | 
| 7975 */ |  | 
| 7976 static UCollationResult |  | 
| 7977 ucol_strcollRegularUTF8( |  | 
| 7978                     const UCollator *coll, |  | 
| 7979                     const char      *source, |  | 
| 7980                     int32_t         sourceLength, |  | 
| 7981                     const char      *target, |  | 
| 7982                     int32_t         targetLength, |  | 
| 7983                     UErrorCode      *status) |  | 
| 7984 { |  | 
| 7985     UCharIterator src; |  | 
| 7986     UCharIterator tgt; |  | 
| 7987 |  | 
| 7988     uiter_setUTF8(&src, source, sourceLength); |  | 
| 7989     uiter_setUTF8(&tgt, target, targetLength); |  | 
| 7990 |  | 
| 7991     // Preparing the context objects for iterating over strings |  | 
| 7992     collIterate sColl, tColl; |  | 
| 7993     IInit_collIterate(coll, NULL, -1, &sColl, status); |  | 
| 7994     IInit_collIterate(coll, NULL, -1, &tColl, status); |  | 
| 7995     if(U_FAILURE(*status)) { |  | 
| 7996         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) |  | 
| 7997         return UCOL_EQUAL; |  | 
| 7998     } |  | 
| 7999     // The division for the array length may truncate the array size to |  | 
| 8000     // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high |  | 
| 8001     // for all platforms anyway. |  | 
| 8002     UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; |  | 
| 8003     UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; |  | 
| 8004     UNormIterator *sNormIter = NULL, *tNormIter = NULL; |  | 
| 8005 |  | 
| 8006     sColl.iterator = &src; |  | 
| 8007     sColl.flags |= UCOL_USE_ITERATOR; |  | 
| 8008     tColl.flags |= UCOL_USE_ITERATOR; |  | 
| 8009     tColl.iterator = &tgt; |  | 
| 8010 |  | 
| 8011     if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) { |  | 
| 8012         sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), statu
      s); |  | 
| 8013         sColl.iterator = unorm_setIter(sNormIter, &src, UNORM_FCD, status); |  | 
| 8014         sColl.flags &= ~UCOL_ITER_NORM; |  | 
| 8015 |  | 
| 8016         tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), statu
      s); |  | 
| 8017         tColl.iterator = unorm_setIter(tNormIter, &tgt, UNORM_FCD, status); |  | 
| 8018         tColl.flags &= ~UCOL_ITER_NORM; |  | 
| 8019     } |  | 
| 8020 |  | 
| 8021     return ucol_strcollRegular(&sColl, &tColl, status); |  | 
| 8022 } |  | 
| 8023 |  | 
| 8024 static inline uint32_t |  | 
| 8025 ucol_getLatinOneContractionUTF8(const UCollator *coll, int32_t strength, |  | 
| 8026                           uint32_t CE, const char *s, int32_t *index, int32_t le
      n) |  | 
| 8027 { |  | 
| 8028     const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF); |  | 
| 8029     int32_t latinOneOffset = (CE & 0x00FFF000) >> 12; |  | 
| 8030     int32_t offset = 1; |  | 
| 8031     UChar32 schar = 0, tchar = 0; |  | 
| 8032 |  | 
| 8033     for(;;) { |  | 
| 8034         if (*index == len) { |  | 
| 8035             return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff
      set]); |  | 
| 8036         } |  | 
| 8037         U8_GET_OR_FFFD((const uint8_t*)s, 0, *index, len, schar); |  | 
| 8038         if (len < 0 && schar == 0) { |  | 
| 8039             return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff
      set]); |  | 
| 8040         } |  | 
| 8041 |  | 
| 8042         while(schar > (tchar = *(UCharOffset+offset))) { /* since the contractio
      n codepoints should be ordered, we skip all that are smaller */ |  | 
| 8043             offset++; |  | 
| 8044         } |  | 
| 8045 |  | 
| 8046         if (schar == tchar) { |  | 
| 8047             U8_FWD_1(s, *index, len); |  | 
| 8048             return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff
      set+offset]); |  | 
| 8049         } |  | 
| 8050         else |  | 
| 8051         { |  | 
| 8052             if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) { |  | 
| 8053                 return UCOL_BAIL_OUT_CE; |  | 
| 8054             } |  | 
| 8055             // skip completely ignorables |  | 
| 8056             uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar); |  | 
| 8057             if(isZeroCE == 0) { // we have to ignore completely ignorables |  | 
| 8058                 U8_FWD_1(s, *index, len); |  | 
| 8059                 continue; |  | 
| 8060             } |  | 
| 8061 |  | 
| 8062             return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff
      set]); |  | 
| 8063         } |  | 
| 8064     } |  | 
| 8065 } |  | 
| 8066 |  | 
| 8067 static inline UCollationResult |  | 
| 8068 ucol_strcollUseLatin1UTF8( |  | 
| 8069                 const UCollator *coll, |  | 
| 8070                 const char      *source, |  | 
| 8071                 int32_t         sLen, |  | 
| 8072                 const char      *target, |  | 
| 8073                 int32_t         tLen, |  | 
| 8074                 UErrorCode      *status) |  | 
| 8075 { |  | 
| 8076     U_ALIGN_CODE(16); |  | 
| 8077     int32_t strength = coll->strength; |  | 
| 8078 |  | 
| 8079     int32_t sIndex = 0, tIndex = 0; |  | 
| 8080     UChar32 sChar = 0, tChar = 0; |  | 
| 8081     uint32_t sOrder=0, tOrder=0; |  | 
| 8082 |  | 
| 8083     UBool endOfSource = FALSE; |  | 
| 8084 |  | 
| 8085     uint32_t *elements = coll->latinOneCEs; |  | 
| 8086 |  | 
| 8087     UBool haveContractions = FALSE; // if we have contractions in our string |  | 
| 8088                                     // we cannot do French secondary |  | 
| 8089 |  | 
| 8090     // Do the primary level |  | 
| 8091     for(;;) { |  | 
| 8092         while(sOrder==0) { // this loop skips primary ignorables |  | 
| 8093             // sOrder=getNextlatinOneCE(source); |  | 
| 8094             if (sIndex == sLen) { |  | 
| 8095                 endOfSource = TRUE; |  | 
| 8096                 break; |  | 
| 8097             } |  | 
| 8098             U8_NEXT_OR_FFFD(source, sIndex, sLen ,sChar); |  | 
| 8099             if (sLen < 0 && sChar == 0) { |  | 
| 8100                 endOfSource = TRUE; |  | 
| 8101                 sLen = sIndex; |  | 
| 8102                 break; |  | 
| 8103             } |  | 
| 8104             if(sChar&0xFFFFFF00) { // if we encounter non-latin-1, we bail out (
      sChar > 0xFF, but this is faster on win32) |  | 
| 8105                 //fprintf(stderr, "R"); |  | 
| 8106                 return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen,
       status); |  | 
| 8107             } |  | 
| 8108             sOrder = elements[sChar]; |  | 
| 8109             if(sOrder >= UCOL_NOT_FOUND) { // if we got a special |  | 
| 8110                 // specials can basically be either contractions or bail-out sig
      ns. If we get anything |  | 
| 8111                 // else, we'll bail out anywasy |  | 
| 8112                 if(getCETag(sOrder) == CONTRACTION_TAG) { |  | 
| 8113                     sOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_PRIMARY,
       sOrder, source, &sIndex, sLen); |  | 
| 8114                     haveContractions = TRUE; // if there are contractions, we ca
      nnot do French secondary |  | 
| 8115                     // However, if there are contractions in the table, but we a
      lways use just one char, |  | 
| 8116                     // we might be able to do French. This should be checked out
      . |  | 
| 8117                 } |  | 
| 8118                 if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { |  | 
| 8119                     //fprintf(stderr, "S"); |  | 
| 8120                     return ucol_strcollRegularUTF8(coll, source, sLen, target, t
      Len, status); |  | 
| 8121                 } |  | 
| 8122             } |  | 
| 8123         } |  | 
| 8124 |  | 
| 8125         while(tOrder==0) {  // this loop skips primary ignorables |  | 
| 8126             // tOrder=getNextlatinOneCE(target); |  | 
| 8127             if (tIndex == tLen) { |  | 
| 8128                 if(endOfSource) { |  | 
| 8129                     goto endOfPrimLoopU8; |  | 
| 8130                 } else { |  | 
| 8131                     return UCOL_GREATER; |  | 
| 8132                 } |  | 
| 8133             } |  | 
| 8134             U8_NEXT_OR_FFFD(target, tIndex, tLen, tChar); |  | 
| 8135             if (tLen < 0 && tChar == 0) { |  | 
| 8136                 if(endOfSource) { |  | 
| 8137                     tLen = tIndex; |  | 
| 8138                     goto endOfPrimLoopU8; |  | 
| 8139                 } else { |  | 
| 8140                     return UCOL_GREATER; |  | 
| 8141                 } |  | 
| 8142             } |  | 
| 8143             if(tChar&0xFFFFFF00) { // if we encounter non-latin-1, we bail out (
      sChar > 0xFF, but this is faster on win32) |  | 
| 8144                 //fprintf(stderr, "R"); |  | 
| 8145                 return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen,
       status); |  | 
| 8146             } |  | 
| 8147             tOrder = elements[tChar]; |  | 
| 8148             if(tOrder >= UCOL_NOT_FOUND) { |  | 
| 8149                 // Handling specials, see the comments for source |  | 
| 8150                 if(getCETag(tOrder) == CONTRACTION_TAG) { |  | 
| 8151                     tOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_PRIMARY,
       tOrder, target, &tIndex, tLen); |  | 
| 8152                     haveContractions = TRUE; |  | 
| 8153                 } |  | 
| 8154                 if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) { |  | 
| 8155                     //fprintf(stderr, "S"); |  | 
| 8156                     return ucol_strcollRegularUTF8(coll, source, sLen, target, t
      Len, status); |  | 
| 8157                 } |  | 
| 8158             } |  | 
| 8159         } |  | 
| 8160         if(endOfSource) { // source is finished, but target is not, say the resu
      lt. |  | 
| 8161             return UCOL_LESS; |  | 
| 8162         } |  | 
| 8163 |  | 
| 8164         if(sOrder == tOrder) { // if we have same CEs, we continue the loop |  | 
| 8165             sOrder = 0; tOrder = 0; |  | 
| 8166             continue; |  | 
| 8167         } else { |  | 
| 8168             // compare current top bytes |  | 
| 8169             if(((sOrder^tOrder)&0xFF000000)!=0) { |  | 
| 8170                 // top bytes differ, return difference |  | 
| 8171                 if(sOrder < tOrder) { |  | 
| 8172                     return UCOL_LESS; |  | 
| 8173                 } else if(sOrder > tOrder) { |  | 
| 8174                     return UCOL_GREATER; |  | 
| 8175                 } |  | 
| 8176                 // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24)
      ; |  | 
| 8177                 // since we must return enum value |  | 
| 8178             } |  | 
| 8179 |  | 
| 8180             // top bytes match, continue with following bytes |  | 
| 8181             sOrder<<=8; |  | 
| 8182             tOrder<<=8; |  | 
| 8183         } |  | 
| 8184     } |  | 
| 8185 |  | 
| 8186 endOfPrimLoopU8: |  | 
| 8187     // after primary loop, we definitely know the sizes of strings, |  | 
| 8188     // so we set it and use simpler loop for secondaries and tertiaries |  | 
| 8189     sLen = sIndex; tLen = tIndex; |  | 
| 8190     if(strength >= UCOL_SECONDARY) { |  | 
| 8191         // adjust the table beggining |  | 
| 8192         elements += coll->latinOneTableLen; |  | 
| 8193         endOfSource = FALSE; |  | 
| 8194 |  | 
| 8195         if(coll->frenchCollation == UCOL_OFF) { // non French |  | 
| 8196             // This loop is a simplified copy of primary loop |  | 
| 8197             // at this point we know that whole strings are latin-1, so we don't |  | 
| 8198             // check for that. We also know that we only have contractions as |  | 
| 8199             // specials. |  | 
| 8200             sIndex = 0; tIndex = 0; |  | 
| 8201             for(;;) { |  | 
| 8202                 while(sOrder==0) { |  | 
| 8203                     if(sIndex==sLen) { |  | 
| 8204                         endOfSource = TRUE; |  | 
| 8205                         break; |  | 
| 8206                     } |  | 
| 8207                     U_ASSERT(sLen >= 0); |  | 
| 8208                     U8_NEXT_OR_FFFD(source, sIndex, sLen, sChar); |  | 
| 8209                     U_ASSERT(sChar >= 0 && sChar <= 0xFF); |  | 
| 8210                     sOrder = elements[sChar]; |  | 
| 8211                     if(sOrder > UCOL_NOT_FOUND) { |  | 
| 8212                         sOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_SECO
      NDARY, sOrder, source, &sIndex, sLen); |  | 
| 8213                     } |  | 
| 8214                 } |  | 
| 8215 |  | 
| 8216                 while(tOrder==0) { |  | 
| 8217                     if(tIndex==tLen) { |  | 
| 8218                         if(endOfSource) { |  | 
| 8219                             goto endOfSecLoopU8; |  | 
| 8220                         } else { |  | 
| 8221                             return UCOL_GREATER; |  | 
| 8222                         } |  | 
| 8223                     } |  | 
| 8224                     U_ASSERT(tLen >= 0); |  | 
| 8225                     U8_NEXT_OR_FFFD(target, tIndex, tLen, tChar); |  | 
| 8226                     U_ASSERT(tChar >= 0 && tChar <= 0xFF); |  | 
| 8227                     tOrder = elements[tChar]; |  | 
| 8228                     if(tOrder > UCOL_NOT_FOUND) { |  | 
| 8229                         tOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_SECO
      NDARY, tOrder, target, &tIndex, tLen); |  | 
| 8230                     } |  | 
| 8231                 } |  | 
| 8232                 if(endOfSource) { |  | 
| 8233                     return UCOL_LESS; |  | 
| 8234                 } |  | 
| 8235 |  | 
| 8236                 if(sOrder == tOrder) { |  | 
| 8237                     sOrder = 0; tOrder = 0; |  | 
| 8238                     continue; |  | 
| 8239                 } else { |  | 
| 8240                     // see primary loop for comments on this |  | 
| 8241                     if(((sOrder^tOrder)&0xFF000000)!=0) { |  | 
| 8242                         if(sOrder < tOrder) { |  | 
| 8243                             return UCOL_LESS; |  | 
| 8244                         } else if(sOrder > tOrder) { |  | 
| 8245                             return UCOL_GREATER; |  | 
| 8246                         } |  | 
| 8247                     } |  | 
| 8248                     sOrder<<=8; |  | 
| 8249                     tOrder<<=8; |  | 
| 8250                 } |  | 
| 8251             } |  | 
| 8252         } else { // French |  | 
| 8253             if(haveContractions) { // if we have contractions, we have to bail o
      ut |  | 
| 8254                 // since we don't really know how to handle them here |  | 
| 8255                 return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen,
       status); |  | 
| 8256             } |  | 
| 8257             // For French, we go backwards |  | 
| 8258             sIndex = sLen; tIndex = tLen; |  | 
| 8259             for(;;) { |  | 
| 8260                 while(sOrder==0) { |  | 
| 8261                     if(sIndex==0) { |  | 
| 8262                         endOfSource = TRUE; |  | 
| 8263                         break; |  | 
| 8264                     } |  | 
| 8265                     U8_PREV_OR_FFFD(source, 0, sIndex, sChar); |  | 
| 8266                     U_ASSERT(sChar >= 0 && sChar <= 0xFF); |  | 
| 8267                     sOrder = elements[sChar]; |  | 
| 8268                     // don't even look for contractions |  | 
| 8269                 } |  | 
| 8270 |  | 
| 8271                 while(tOrder==0) { |  | 
| 8272                     if(tIndex==0) { |  | 
| 8273                         if(endOfSource) { |  | 
| 8274                             goto endOfSecLoopU8; |  | 
| 8275                         } else { |  | 
| 8276                             return UCOL_GREATER; |  | 
| 8277                         } |  | 
| 8278                     } |  | 
| 8279                     U8_PREV_OR_FFFD(target, 0, tIndex, tChar); |  | 
| 8280                     U_ASSERT(tChar >= 0 && tChar <= 0xFF); |  | 
| 8281                     tOrder = elements[tChar]; |  | 
| 8282                     // don't even look for contractions |  | 
| 8283                 } |  | 
| 8284                 if(endOfSource) { |  | 
| 8285                     return UCOL_LESS; |  | 
| 8286                 } |  | 
| 8287 |  | 
| 8288                 if(sOrder == tOrder) { |  | 
| 8289                     sOrder = 0; tOrder = 0; |  | 
| 8290                     continue; |  | 
| 8291                 } else { |  | 
| 8292                     // see the primary loop for comments |  | 
| 8293                     if(((sOrder^tOrder)&0xFF000000)!=0) { |  | 
| 8294                         if(sOrder < tOrder) { |  | 
| 8295                             return UCOL_LESS; |  | 
| 8296                         } else if(sOrder > tOrder) { |  | 
| 8297                             return UCOL_GREATER; |  | 
| 8298                         } |  | 
| 8299                     } |  | 
| 8300                     sOrder<<=8; |  | 
| 8301                     tOrder<<=8; |  | 
| 8302                 } |  | 
| 8303             } |  | 
| 8304         } |  | 
| 8305     } |  | 
| 8306 |  | 
| 8307 endOfSecLoopU8: |  | 
| 8308     if(strength >= UCOL_TERTIARY) { |  | 
| 8309         // tertiary loop is the same as secondary (except no French) |  | 
| 8310         elements += coll->latinOneTableLen; |  | 
| 8311         sIndex = 0; tIndex = 0; |  | 
| 8312         endOfSource = FALSE; |  | 
| 8313         for(;;) { |  | 
| 8314             while(sOrder==0) { |  | 
| 8315                 if(sIndex==sLen) { |  | 
| 8316                     endOfSource = TRUE; |  | 
| 8317                     break; |  | 
| 8318                 } |  | 
| 8319                 U_ASSERT(sLen >= 0); |  | 
| 8320                 U8_NEXT_OR_FFFD(source, sIndex, sLen, sChar); |  | 
| 8321                 U_ASSERT(sChar >= 0 && sChar <= 0xFF); |  | 
| 8322                 sOrder = elements[sChar]; |  | 
| 8323                 if(sOrder > UCOL_NOT_FOUND) { |  | 
| 8324                     sOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_TERTIARY
      , sOrder, source, &sIndex, sLen); |  | 
| 8325                 } |  | 
| 8326             } |  | 
| 8327             while(tOrder==0) { |  | 
| 8328                 if(tIndex==tLen) { |  | 
| 8329                     if(endOfSource) { |  | 
| 8330                         return UCOL_EQUAL; // if both strings are at the end, th
      ey are equal |  | 
| 8331                     } else { |  | 
| 8332                         return UCOL_GREATER; |  | 
| 8333                     } |  | 
| 8334                 } |  | 
| 8335                 U_ASSERT(tLen >= 0); |  | 
| 8336                 U8_NEXT_OR_FFFD(target, tIndex, tLen, tChar); |  | 
| 8337                 U_ASSERT(tChar >= 0 && tChar <= 0xFF); |  | 
| 8338                 tOrder = elements[tChar]; |  | 
| 8339                 if(tOrder > UCOL_NOT_FOUND) { |  | 
| 8340                     tOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_TERTIARY
      , tOrder, target, &tIndex, tLen); |  | 
| 8341                 } |  | 
| 8342             } |  | 
| 8343             if(endOfSource) { |  | 
| 8344                 return UCOL_LESS; |  | 
| 8345             } |  | 
| 8346             if(sOrder == tOrder) { |  | 
| 8347                 sOrder = 0; tOrder = 0; |  | 
| 8348                 continue; |  | 
| 8349             } else { |  | 
| 8350                 if(((sOrder^tOrder)&0xff000000)!=0) { |  | 
| 8351                     if(sOrder < tOrder) { |  | 
| 8352                         return UCOL_LESS; |  | 
| 8353                     } else if(sOrder > tOrder) { |  | 
| 8354                         return UCOL_GREATER; |  | 
| 8355                     } |  | 
| 8356                 } |  | 
| 8357                 sOrder<<=8; |  | 
| 8358                 tOrder<<=8; |  | 
| 8359             } |  | 
| 8360         } |  | 
| 8361     } |  | 
| 8362     return UCOL_EQUAL; |  | 
| 8363 } | 406 } | 
| 8364 | 407 | 
| 8365 U_CAPI UCollationResult U_EXPORT2 | 408 U_CAPI UCollationResult U_EXPORT2 | 
| 8366 ucol_strcollIter( const UCollator    *coll, | 409 ucol_strcollIter( const UCollator    *coll, | 
| 8367                  UCharIterator *sIter, | 410                  UCharIterator *sIter, | 
| 8368                  UCharIterator *tIter, | 411                  UCharIterator *tIter, | 
| 8369                  UErrorCode         *status) | 412                  UErrorCode         *status) | 
| 8370 { | 413 { | 
| 8371     if(!status || U_FAILURE(*status)) { | 414     if(!status || U_FAILURE(*status)) { | 
| 8372         return UCOL_EQUAL; | 415         return UCOL_EQUAL; | 
| 8373     } | 416     } | 
| 8374 | 417 | 
| 8375     UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); | 418     UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); | 
| 8376     UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIt
      er); | 419     UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIt
      er); | 
| 8377 | 420 | 
| 8378     if (sIter == tIter) { |  | 
| 8379         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) |  | 
| 8380         return UCOL_EQUAL; |  | 
| 8381     } |  | 
| 8382     if(sIter == NULL || tIter == NULL || coll == NULL) { | 421     if(sIter == NULL || tIter == NULL || coll == NULL) { | 
| 8383         *status = U_ILLEGAL_ARGUMENT_ERROR; | 422         *status = U_ILLEGAL_ARGUMENT_ERROR; | 
| 8384         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) | 423         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); | 
| 8385         return UCOL_EQUAL; | 424         return UCOL_EQUAL; | 
| 8386     } | 425     } | 
| 8387 | 426 | 
| 8388     UCollationResult result = UCOL_EQUAL; | 427     UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tI
      ter, *status); | 
| 8389 | 428 | 
| 8390     // Preparing the context objects for iterating over strings | 429     UTRACE_EXIT_VALUE_STATUS(result, *status); | 
| 8391     collIterate sColl, tColl; |  | 
| 8392     IInit_collIterate(coll, NULL, -1, &sColl, status); |  | 
| 8393     IInit_collIterate(coll, NULL, -1, &tColl, status); |  | 
| 8394     if(U_FAILURE(*status)) { |  | 
| 8395         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status) |  | 
| 8396         return UCOL_EQUAL; |  | 
| 8397     } |  | 
| 8398     // The division for the array length may truncate the array size to |  | 
| 8399     // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high |  | 
| 8400     // for all platforms anyway. |  | 
| 8401     UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; |  | 
| 8402     UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)]; |  | 
| 8403     UNormIterator *sNormIter = NULL, *tNormIter = NULL; |  | 
| 8404 |  | 
| 8405     sColl.iterator = sIter; |  | 
| 8406     sColl.flags |= UCOL_USE_ITERATOR; |  | 
| 8407     tColl.flags |= UCOL_USE_ITERATOR; |  | 
| 8408     tColl.iterator = tIter; |  | 
| 8409 |  | 
| 8410     if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) { |  | 
| 8411         sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), statu
      s); |  | 
| 8412         sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status); |  | 
| 8413         sColl.flags &= ~UCOL_ITER_NORM; |  | 
| 8414 |  | 
| 8415         tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), statu
      s); |  | 
| 8416         tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status); |  | 
| 8417         tColl.flags &= ~UCOL_ITER_NORM; |  | 
| 8418     } |  | 
| 8419 |  | 
| 8420     UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL; |  | 
| 8421 |  | 
| 8422     while((sChar = sColl.iterator->next(sColl.iterator)) == |  | 
| 8423         (tChar = tColl.iterator->next(tColl.iterator))) { |  | 
| 8424             if(sChar == U_SENTINEL) { |  | 
| 8425                 result = UCOL_EQUAL; |  | 
| 8426                 goto end_compare; |  | 
| 8427             } |  | 
| 8428     } |  | 
| 8429 |  | 
| 8430     if(sChar == U_SENTINEL) { |  | 
| 8431         tChar = tColl.iterator->previous(tColl.iterator); |  | 
| 8432     } |  | 
| 8433 |  | 
| 8434     if(tChar == U_SENTINEL) { |  | 
| 8435         sChar = sColl.iterator->previous(sColl.iterator); |  | 
| 8436     } |  | 
| 8437 |  | 
| 8438     sChar = sColl.iterator->previous(sColl.iterator); |  | 
| 8439     tChar = tColl.iterator->previous(tColl.iterator); |  | 
| 8440 |  | 
| 8441     if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll)) |  | 
| 8442     { |  | 
| 8443         // We are stopped in the middle of a contraction. |  | 
| 8444         // Scan backwards through the == part of the string looking for the star
      t of the contraction. |  | 
| 8445         //   It doesn't matter which string we scan, since they are the same in 
      this region. |  | 
| 8446         do |  | 
| 8447         { |  | 
| 8448             sChar = sColl.iterator->previous(sColl.iterator); |  | 
| 8449             tChar = tColl.iterator->previous(tColl.iterator); |  | 
| 8450         } |  | 
| 8451         while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll)); |  | 
| 8452     } |  | 
| 8453 |  | 
| 8454 |  | 
| 8455     if(U_SUCCESS(*status)) { |  | 
| 8456         result = ucol_strcollRegular(&sColl, &tColl, status); |  | 
| 8457     } |  | 
| 8458 |  | 
| 8459 end_compare: |  | 
| 8460     if(sNormIter || tNormIter) { |  | 
| 8461         unorm_closeIter(sNormIter); |  | 
| 8462         unorm_closeIter(tNormIter); |  | 
| 8463     } |  | 
| 8464 |  | 
| 8465     UTRACE_EXIT_VALUE_STATUS(result, *status) |  | 
| 8466     return result; | 430     return result; | 
| 8467 } | 431 } | 
| 8468 | 432 | 
| 8469 | 433 | 
| 8470 /*                                                                      */ | 434 /*                                                                      */ | 
| 8471 /* ucol_strcoll     Main public API string comparison function          */ | 435 /* ucol_strcoll     Main public API string comparison function          */ | 
| 8472 /*                                                                      */ | 436 /*                                                                      */ | 
| 8473 U_CAPI UCollationResult U_EXPORT2 | 437 U_CAPI UCollationResult U_EXPORT2 | 
| 8474 ucol_strcoll( const UCollator    *coll, | 438 ucol_strcoll( const UCollator    *coll, | 
| 8475               const UChar        *source, | 439               const UChar        *source, | 
| 8476               int32_t            sourceLength, | 440               int32_t            sourceLength, | 
| 8477               const UChar        *target, | 441               const UChar        *target, | 
| 8478               int32_t            targetLength) | 442               int32_t            targetLength) | 
| 8479 { | 443 { | 
| 8480     U_ALIGN_CODE(16); | 444     U_ALIGN_CODE(16); | 
| 8481 | 445 | 
| 8482     UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); | 446     UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); | 
| 8483     if (UTRACE_LEVEL(UTRACE_VERBOSE)) { | 447     if (UTRACE_LEVEL(UTRACE_VERBOSE)) { | 
| 8484         UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, sour
      ce, target); | 448         UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, sour
      ce, target); | 
| 8485         UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLengt
      h); | 449         UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLengt
      h); | 
| 8486         UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLengt
      h); | 450         UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLengt
      h); | 
| 8487     } | 451     } | 
| 8488 | 452 | 
| 8489     if((source == NULL && sourceLength != 0) || (target == NULL && targetLength 
      != 0)) { |  | 
| 8490         // do not crash, but return. Should have |  | 
| 8491         // status argument to return error. |  | 
| 8492         UTRACE_EXIT_VALUE(UCOL_EQUAL); |  | 
| 8493         return UCOL_EQUAL; |  | 
| 8494     } |  | 
| 8495 |  | 
| 8496     /* Quick check if source and target are same strings. */ |  | 
| 8497     /* They should either both be NULL terminated or the explicit length should 
      be set on both. */ |  | 
| 8498     if (source==target && sourceLength==targetLength) { |  | 
| 8499         UTRACE_EXIT_VALUE(UCOL_EQUAL); |  | 
| 8500         return UCOL_EQUAL; |  | 
| 8501     } |  | 
| 8502 |  | 
| 8503     if(coll->delegate != NULL) { |  | 
| 8504       UErrorCode status = U_ZERO_ERROR; |  | 
| 8505       return ((const Collator*)coll->delegate)->compare(source,sourceLength,targ
      et,targetLength, status); |  | 
| 8506     } |  | 
| 8507 |  | 
| 8508     /* Scan the strings.  Find:                                                 
                  */ |  | 
| 8509     /*    The length of any leading portion that is equal                       
                  */ |  | 
| 8510     /*    Whether they are exactly equal.  (in which case we just return)       
                  */ |  | 
| 8511     const UChar    *pSrc    = source; |  | 
| 8512     const UChar    *pTarg   = target; |  | 
| 8513     int32_t        equalLength; |  | 
| 8514 |  | 
| 8515     if (sourceLength == -1 && targetLength == -1) { |  | 
| 8516         // Both strings are null terminated. |  | 
| 8517         //    Scan through any leading equal portion. |  | 
| 8518         while (*pSrc == *pTarg && *pSrc != 0) { |  | 
| 8519             pSrc++; |  | 
| 8520             pTarg++; |  | 
| 8521         } |  | 
| 8522         if (*pSrc == 0 && *pTarg == 0) { |  | 
| 8523             UTRACE_EXIT_VALUE(UCOL_EQUAL); |  | 
| 8524             return UCOL_EQUAL; |  | 
| 8525         } |  | 
| 8526         equalLength = (int32_t)(pSrc - source); |  | 
| 8527     } |  | 
| 8528     else |  | 
| 8529     { |  | 
| 8530         // One or both strings has an explicit length. |  | 
| 8531         const UChar    *pSrcEnd = source + sourceLength; |  | 
| 8532         const UChar    *pTargEnd = target + targetLength; |  | 
| 8533 |  | 
| 8534         // Scan while the strings are bitwise ==, or until one is exhausted. |  | 
| 8535         for (;;) { |  | 
| 8536             if (pSrc == pSrcEnd || pTarg == pTargEnd) { |  | 
| 8537                 break; |  | 
| 8538             } |  | 
| 8539             if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLeng
      th == -1)) { |  | 
| 8540                 break; |  | 
| 8541             } |  | 
| 8542             if (*pSrc != *pTarg) { |  | 
| 8543                 break; |  | 
| 8544             } |  | 
| 8545             pSrc++; |  | 
| 8546             pTarg++; |  | 
| 8547         } |  | 
| 8548         equalLength = (int32_t)(pSrc - source); |  | 
| 8549 |  | 
| 8550         // If we made it all the way through both strings, we are done.  They ar
      e == |  | 
| 8551         if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of
       src string, however it was specified. */ |  | 
| 8552             (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))     /* and also 
      at end of dest string                  */ |  | 
| 8553         { |  | 
| 8554             UTRACE_EXIT_VALUE(UCOL_EQUAL); |  | 
| 8555             return UCOL_EQUAL; |  | 
| 8556         } |  | 
| 8557     } |  | 
| 8558     if (equalLength > 0) { |  | 
| 8559         /* There is an identical portion at the beginning of the two strings.   
           */ |  | 
| 8560         /*   If the identical portion ends within a contraction or a comibining 
           */ |  | 
| 8561         /*   character sequence, back up to the start of that sequence.         
           */ |  | 
| 8562 |  | 
| 8563         // These values should already be set by the code above. |  | 
| 8564         //pSrc  = source + equalLength;        /* point to the first differing c
      hars   */ |  | 
| 8565         //pTarg = target + equalLength; |  | 
| 8566         if ((pSrc  != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) || |  | 
| 8567             (pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll))) |  | 
| 8568         { |  | 
| 8569             // We are stopped in the middle of a contraction. |  | 
| 8570             // Scan backwards through the == part of the string looking for the 
      start of the contraction. |  | 
| 8571             //   It doesn't matter which string we scan, since they are the same
       in this region. |  | 
| 8572             do |  | 
| 8573             { |  | 
| 8574                 equalLength--; |  | 
| 8575                 pSrc--; |  | 
| 8576             } |  | 
| 8577             while (equalLength>0 && ucol_unsafeCP(*pSrc, coll)); |  | 
| 8578         } |  | 
| 8579 |  | 
| 8580         source += equalLength; |  | 
| 8581         target += equalLength; |  | 
| 8582         if (sourceLength > 0) { |  | 
| 8583             sourceLength -= equalLength; |  | 
| 8584         } |  | 
| 8585         if (targetLength > 0) { |  | 
| 8586             targetLength -= equalLength; |  | 
| 8587         } |  | 
| 8588     } |  | 
| 8589 |  | 
| 8590     UErrorCode status = U_ZERO_ERROR; | 453     UErrorCode status = U_ZERO_ERROR; | 
| 8591     UCollationResult returnVal; | 454     UCollationResult returnVal = Collator::fromUCollator(coll)-> | 
| 8592     if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLeng
      th > 0 && *target&0xff00)) { | 455             compare(source, sourceLength, target, targetLength, status); | 
| 8593         returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targ
      etLength, &status); | 456     UTRACE_EXIT_VALUE_STATUS(returnVal, status); | 
| 8594     } else { |  | 
| 8595         returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, ta
      rgetLength, &status); |  | 
| 8596     } |  | 
| 8597     UTRACE_EXIT_VALUE(returnVal); |  | 
| 8598     return returnVal; | 457     return returnVal; | 
| 8599 } | 458 } | 
| 8600 | 459 | 
| 8601 U_CAPI UCollationResult U_EXPORT2 | 460 U_CAPI UCollationResult U_EXPORT2 | 
| 8602 ucol_strcollUTF8( | 461 ucol_strcollUTF8( | 
| 8603         const UCollator *coll, | 462         const UCollator *coll, | 
| 8604         const char      *source, | 463         const char      *source, | 
| 8605         int32_t         sourceLength, | 464         int32_t         sourceLength, | 
| 8606         const char      *target, | 465         const char      *target, | 
| 8607         int32_t         targetLength, | 466         int32_t         targetLength, | 
| 8608         UErrorCode      *status) | 467         UErrorCode      *status) | 
| 8609 { | 468 { | 
| 8610     U_ALIGN_CODE(16); | 469     U_ALIGN_CODE(16); | 
| 8611 | 470 | 
| 8612     UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8); | 471     UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8); | 
| 8613     if (UTRACE_LEVEL(UTRACE_VERBOSE)) { | 472     if (UTRACE_LEVEL(UTRACE_VERBOSE)) { | 
| 8614         UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, sour
      ce, target); | 473         UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, sour
      ce, target); | 
| 8615         UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLengt
      h); | 474         UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLengt
      h); | 
| 8616         UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLengt
      h); | 475         UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLengt
      h); | 
| 8617     } | 476     } | 
| 8618 | 477 | 
| 8619     if (U_FAILURE(*status)) { | 478     if (U_FAILURE(*status)) { | 
| 8620         /* do nothing */ | 479         /* do nothing */ | 
| 8621         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); | 480         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); | 
| 8622         return UCOL_EQUAL; | 481         return UCOL_EQUAL; | 
| 8623     } | 482     } | 
| 8624 | 483 | 
| 8625     if((source == NULL && sourceLength != 0) || (target == NULL && targetLength 
      != 0)) { | 484     UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareU
      TF8( | 
| 8626         *status = U_ILLEGAL_ARGUMENT_ERROR; | 485             source, sourceLength, target, targetLength, *status); | 
| 8627         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); |  | 
| 8628         return UCOL_EQUAL; |  | 
| 8629     } |  | 
| 8630 |  | 
| 8631     /* Quick check if source and target are same strings. */ |  | 
| 8632     /* They should either both be NULL terminated or the explicit length should 
      be set on both. */ |  | 
| 8633     if (source==target && sourceLength==targetLength) { |  | 
| 8634         UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); |  | 
| 8635         return UCOL_EQUAL; |  | 
| 8636     } |  | 
| 8637 |  | 
| 8638     if(coll->delegate != NULL) { |  | 
| 8639         return ((const Collator*)coll->delegate)->compareUTF8( |  | 
| 8640             StringPiece(source, (sourceLength < 0) ? uprv_strlen(source) : sourc
      eLength), |  | 
| 8641             StringPiece(target, (targetLength < 0) ? uprv_strlen(target) : targe
      tLength), |  | 
| 8642             *status); |  | 
| 8643     } |  | 
| 8644 |  | 
| 8645     /* Scan the strings.  Find:                                                 
                  */ |  | 
| 8646     /*    The length of any leading portion that is equal                       
                  */ |  | 
| 8647     /*    Whether they are exactly equal.  (in which case we just return)       
                  */ |  | 
| 8648     const char  *pSrc = source; |  | 
| 8649     const char  *pTarg = target; |  | 
| 8650     UBool       bSrcLimit = FALSE; |  | 
| 8651     UBool       bTargLimit = FALSE; |  | 
| 8652 |  | 
| 8653     if (sourceLength == -1 && targetLength == -1) { |  | 
| 8654         // Both strings are null terminated. |  | 
| 8655         //    Scan through any leading equal portion. |  | 
| 8656         while (*pSrc == *pTarg && *pSrc != 0) { |  | 
| 8657             pSrc++; |  | 
| 8658             pTarg++; |  | 
| 8659         } |  | 
| 8660         if (*pSrc == 0 && *pTarg == 0) { |  | 
| 8661             UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); |  | 
| 8662             return UCOL_EQUAL; |  | 
| 8663         } |  | 
| 8664         bSrcLimit = (*pSrc == 0); |  | 
| 8665         bTargLimit = (*pTarg == 0); |  | 
| 8666     } |  | 
| 8667     else |  | 
| 8668     { |  | 
| 8669         // One or both strings has an explicit length. |  | 
| 8670         const char *pSrcEnd = source + sourceLength; |  | 
| 8671         const char *pTargEnd = target + targetLength; |  | 
| 8672 |  | 
| 8673         // Scan while the strings are bitwise ==, or until one is exhausted. |  | 
| 8674         for (;;) { |  | 
| 8675             if (pSrc == pSrcEnd || pTarg == pTargEnd) { |  | 
| 8676                 break; |  | 
| 8677             } |  | 
| 8678             if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLeng
      th == -1)) { |  | 
| 8679                 break; |  | 
| 8680             } |  | 
| 8681             if (*pSrc != *pTarg) { |  | 
| 8682                 break; |  | 
| 8683             } |  | 
| 8684             pSrc++; |  | 
| 8685             pTarg++; |  | 
| 8686         } |  | 
| 8687         bSrcLimit = (pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0)); |  | 
| 8688         bTargLimit = (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)); |  | 
| 8689 |  | 
| 8690         // If we made it all the way through both strings, we are done.  They ar
      e == |  | 
| 8691         if (bSrcLimit &&    /* At end of src string, however it was specified. *
      / |  | 
| 8692             bTargLimit)     /* and also at end of dest string                  *
      / |  | 
| 8693         { |  | 
| 8694             UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); |  | 
| 8695             return UCOL_EQUAL; |  | 
| 8696         } |  | 
| 8697     } |  | 
| 8698 |  | 
| 8699     U_ASSERT(!(bSrcLimit && bTargLimit)); |  | 
| 8700 |  | 
| 8701     int32_t    equalLength = pSrc - source; |  | 
| 8702     UBool       bSawNonLatin1 = FALSE; |  | 
| 8703 |  | 
| 8704     if (equalLength > 0) { |  | 
| 8705         // Align position to the start of UTF-8 code point. |  | 
| 8706         if (bTargLimit) { |  | 
| 8707             U8_SET_CP_START((const uint8_t*)source, 0, equalLength); |  | 
| 8708         } else { |  | 
| 8709             U8_SET_CP_START((const uint8_t*)target, 0, equalLength); |  | 
| 8710         } |  | 
| 8711         pSrc = source + equalLength; |  | 
| 8712         pTarg = target + equalLength; |  | 
| 8713     } |  | 
| 8714 |  | 
| 8715     if (equalLength > 0) { |  | 
| 8716         /* There is an identical portion at the beginning of the two strings.   
           */ |  | 
| 8717         /*   If the identical portion ends within a contraction or a comibining 
           */ |  | 
| 8718         /*   character sequence, back up to the start of that sequence.         
           */ |  | 
| 8719         UBool bUnsafeCP = FALSE; |  | 
| 8720         UChar32 uc32 = -1; |  | 
| 8721 |  | 
| 8722         if (!bSrcLimit) { |  | 
| 8723             U8_GET_OR_FFFD((const uint8_t*)source, 0, equalLength, sourceLength,
       uc32); |  | 
| 8724             if (uc32 >= 0x10000 || ucol_unsafeCP((UChar)uc32, coll)) { |  | 
| 8725                 bUnsafeCP = TRUE; |  | 
| 8726             } |  | 
| 8727             bSawNonLatin1 |= (uc32 > 0xff); |  | 
| 8728         } |  | 
| 8729         if (!bTargLimit) { |  | 
| 8730             U8_GET_OR_FFFD((const uint8_t*)target, 0, equalLength, targetLength,
       uc32); |  | 
| 8731             if (uc32 >= 0x10000 || ucol_unsafeCP((UChar)uc32, coll)) { |  | 
| 8732                 bUnsafeCP = TRUE; |  | 
| 8733             } |  | 
| 8734             bSawNonLatin1 |= (uc32 > 0xff); |  | 
| 8735         } |  | 
| 8736 |  | 
| 8737         if (bUnsafeCP) { |  | 
| 8738             while (equalLength > 0) { |  | 
| 8739                 // We are stopped in the middle of a contraction. |  | 
| 8740                 // Scan backwards through the == part of the string looking for 
      the start of the contraction. |  | 
| 8741                 //   It doesn't matter which string we scan, since they are the 
      same in this region. |  | 
| 8742                 U8_PREV_OR_FFFD((uint8_t*)source, 0, equalLength, uc32); |  | 
| 8743                 bSawNonLatin1 |= (uc32 > 0xff); |  | 
| 8744                 if (uc32 < 0x10000 && !ucol_unsafeCP((UChar)uc32, coll)) { |  | 
| 8745                     break; |  | 
| 8746                 } |  | 
| 8747             } |  | 
| 8748         } |  | 
| 8749         source += equalLength; |  | 
| 8750         target += equalLength; |  | 
| 8751         if (sourceLength > 0) { |  | 
| 8752             sourceLength -= equalLength; |  | 
| 8753         } |  | 
| 8754         if (targetLength > 0) { |  | 
| 8755             targetLength -= equalLength; |  | 
| 8756         } |  | 
| 8757     } else { |  | 
| 8758         // Lead byte of Latin 1 character is 0x00 - 0xC3 |  | 
| 8759         bSawNonLatin1 = (source && (sourceLength != 0) && (uint8_t)*source > 0xc
      3); |  | 
| 8760         bSawNonLatin1 |= (target && (targetLength != 0) && (uint8_t)*target > 0x
      c3); |  | 
| 8761     } |  | 
| 8762 |  | 
| 8763     UCollationResult returnVal; |  | 
| 8764 |  | 
| 8765     if(!coll->latinOneUse || bSawNonLatin1) { |  | 
| 8766         returnVal = ucol_strcollRegularUTF8(coll, source, sourceLength, target, 
      targetLength, status); |  | 
| 8767     } else { |  | 
| 8768         returnVal = ucol_strcollUseLatin1UTF8(coll, source, sourceLength, target
      , targetLength, status); |  | 
| 8769     } |  | 
| 8770     UTRACE_EXIT_VALUE_STATUS(returnVal, *status); | 486     UTRACE_EXIT_VALUE_STATUS(returnVal, *status); | 
| 8771     return returnVal; | 487     return returnVal; | 
| 8772 } | 488 } | 
| 8773 | 489 | 
| 8774 | 490 | 
| 8775 /* convenience function for comparing strings */ | 491 /* convenience function for comparing strings */ | 
| 8776 U_CAPI UBool U_EXPORT2 | 492 U_CAPI UBool U_EXPORT2 | 
| 8777 ucol_greater(    const    UCollator        *coll, | 493 ucol_greater(    const    UCollator        *coll, | 
| 8778         const    UChar            *source, | 494         const    UChar            *source, | 
| 8779         int32_t            sourceLength, | 495         int32_t            sourceLength, | 
| (...skipping 23 matching lines...) Expand all  Loading... | 
| 8803             int32_t            sourceLength, | 519             int32_t            sourceLength, | 
| 8804             const    UChar            *target, | 520             const    UChar            *target, | 
| 8805             int32_t            targetLength) | 521             int32_t            targetLength) | 
| 8806 { | 522 { | 
| 8807     return (ucol_strcoll(coll, source, sourceLength, target, targetLength) | 523     return (ucol_strcoll(coll, source, sourceLength, target, targetLength) | 
| 8808         == UCOL_EQUAL); | 524         == UCOL_EQUAL); | 
| 8809 } | 525 } | 
| 8810 | 526 | 
| 8811 U_CAPI void U_EXPORT2 | 527 U_CAPI void U_EXPORT2 | 
| 8812 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { | 528 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { | 
| 8813     if(coll && coll->UCA) { | 529     const Collator *c = Collator::fromUCollator(coll); | 
| 8814         uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo)); | 530     if(c != NULL) { | 
|  | 531         UVersionInfo v; | 
|  | 532         c->getVersion(v); | 
|  | 533         // Note: This is tied to how the current implementation encodes the UCA 
      version | 
|  | 534         // in the overall getVersion(). | 
|  | 535         // Alternatively, we could load the root collator and get at lower-level
       data from there. | 
|  | 536         // Either way, it will reflect the input collator's UCA version only | 
|  | 537         // if it is a known implementation. | 
|  | 538         // It would be cleaner to make this a virtual Collator method. | 
|  | 539         info[0] = v[1] >> 3; | 
|  | 540         info[1] = v[1] & 7; | 
|  | 541         info[2] = v[2] >> 6; | 
|  | 542         info[3] = 0; | 
| 8815     } | 543     } | 
| 8816 } | 544 } | 
| 8817 | 545 | 
|  | 546 U_CAPI const UChar * U_EXPORT2 | 
|  | 547 ucol_getRules(const UCollator *coll, int32_t *length) { | 
|  | 548     const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); | 
|  | 549     // OK to crash if coll==NULL: We do not want to check "this" pointers. | 
|  | 550     if(rbc != NULL || coll == NULL) { | 
|  | 551         const UnicodeString &rules = rbc->getRules(); | 
|  | 552         U_ASSERT(rules.getBuffer()[rules.length()] == 0); | 
|  | 553         *length = rules.length(); | 
|  | 554         return rules.getBuffer(); | 
|  | 555     } | 
|  | 556     static const UChar _NUL = 0; | 
|  | 557     *length = 0; | 
|  | 558     return &_NUL; | 
|  | 559 } | 
|  | 560 | 
|  | 561 U_CAPI int32_t U_EXPORT2 | 
|  | 562 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int3
      2_t bufferLen) { | 
|  | 563     UnicodeString rules; | 
|  | 564     const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); | 
|  | 565     if(rbc != NULL || coll == NULL) { | 
|  | 566         rbc->getRules(delta, rules); | 
|  | 567     } | 
|  | 568     if(buffer != NULL && bufferLen > 0) { | 
|  | 569         UErrorCode errorCode = U_ZERO_ERROR; | 
|  | 570         return rules.extract(buffer, bufferLen, errorCode); | 
|  | 571     } else { | 
|  | 572         return rules.length(); | 
|  | 573     } | 
|  | 574 } | 
|  | 575 | 
|  | 576 U_CAPI const char * U_EXPORT2 | 
|  | 577 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *statu
      s) { | 
|  | 578     return ucol_getLocaleByType(coll, type, status); | 
|  | 579 } | 
|  | 580 | 
|  | 581 U_CAPI const char * U_EXPORT2 | 
|  | 582 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode 
      *status) { | 
|  | 583     if(U_FAILURE(*status)) { | 
|  | 584         return NULL; | 
|  | 585     } | 
|  | 586     UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); | 
|  | 587     UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); | 
|  | 588 | 
|  | 589     const char *result; | 
|  | 590     const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); | 
|  | 591     if(rbc == NULL && coll != NULL) { | 
|  | 592         *status = U_UNSUPPORTED_ERROR; | 
|  | 593         result = NULL; | 
|  | 594     } else { | 
|  | 595         result = rbc->internalGetLocaleID(type, *status); | 
|  | 596     } | 
|  | 597 | 
|  | 598     UTRACE_DATA1(UTRACE_INFO, "result = %s", result); | 
|  | 599     UTRACE_EXIT_STATUS(*status); | 
|  | 600     return result; | 
|  | 601 } | 
|  | 602 | 
|  | 603 U_CAPI USet * U_EXPORT2 | 
|  | 604 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) { | 
|  | 605     if(U_FAILURE(*status)) { | 
|  | 606         return NULL; | 
|  | 607     } | 
|  | 608     UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status); | 
|  | 609     if(U_FAILURE(*status)) { | 
|  | 610         delete set; | 
|  | 611         return NULL; | 
|  | 612     } | 
|  | 613     return set->toUSet(); | 
|  | 614 } | 
|  | 615 | 
|  | 616 U_CAPI UBool U_EXPORT2 | 
|  | 617 ucol_equals(const UCollator *source, const UCollator *target) { | 
|  | 618     return source == target || | 
|  | 619         (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target))
      ; | 
|  | 620 } | 
|  | 621 | 
| 8818 #endif /* #if !UCONFIG_NO_COLLATION */ | 622 #endif /* #if !UCONFIG_NO_COLLATION */ | 
| OLD | NEW | 
|---|