| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2 ******************************************************************************* | 2 ******************************************************************************* | 
| 3 *   Copyright (C) 2001-2011, International Business Machines | 3 *   Copyright (C) 2001-2014, International Business Machines | 
| 4 *   Corporation and others.  All Rights Reserved. | 4 *   Corporation and others.  All Rights Reserved. | 
| 5 ******************************************************************************* | 5 ******************************************************************************* | 
| 6 *   file name:  bocsu.cpp | 6 *   file name:  bocsu.cpp | 
| 7 *   encoding:   US-ASCII | 7 *   encoding:   US-ASCII | 
| 8 *   tab size:   8 (not used) | 8 *   tab size:   8 (not used) | 
| 9 *   indentation:4 | 9 *   indentation:4 | 
| 10 * | 10 * | 
| 11 *   Author: Markus W. Scherer | 11 *   Author: Markus W. Scherer | 
| 12 * | 12 * | 
| 13 *   Modification history: | 13 *   Modification history: | 
| 14 *   05/18/2001  weiv    Made into separate module | 14 *   05/18/2001  weiv    Made into separate module | 
| 15 */ | 15 */ | 
| 16 | 16 | 
| 17 | 17 | 
| 18 #include "unicode/utypes.h" | 18 #include "unicode/utypes.h" | 
| 19 | 19 | 
| 20 #if !UCONFIG_NO_COLLATION | 20 #if !UCONFIG_NO_COLLATION | 
| 21 | 21 | 
| 22 #include "unicode/bytestream.h" | 22 #include "unicode/bytestream.h" | 
| 23 #include "unicode/utf16.h" | 23 #include "unicode/utf16.h" | 
| 24 #include "bocsu.h" | 24 #include "bocsu.h" | 
| 25 | 25 | 
| 26 /* | 26 /* | 
| 27  * encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes, | 27  * encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes, | 
| 28  * preserving lexical order | 28  * preserving lexical order | 
| 29  */ | 29  */ | 
| 30 U_CFUNC uint8_t * | 30 static uint8_t * | 
| 31 u_writeDiff(int32_t diff, uint8_t *p) { | 31 u_writeDiff(int32_t diff, uint8_t *p) { | 
| 32     if(diff>=SLOPE_REACH_NEG_1) { | 32     if(diff>=SLOPE_REACH_NEG_1) { | 
| 33         if(diff<=SLOPE_REACH_POS_1) { | 33         if(diff<=SLOPE_REACH_POS_1) { | 
| 34             *p++=(uint8_t)(SLOPE_MIDDLE+diff); | 34             *p++=(uint8_t)(SLOPE_MIDDLE+diff); | 
| 35         } else if(diff<=SLOPE_REACH_POS_2) { | 35         } else if(diff<=SLOPE_REACH_POS_2) { | 
| 36             *p++=(uint8_t)(SLOPE_START_POS_2+(diff/SLOPE_TAIL_COUNT)); | 36             *p++=(uint8_t)(SLOPE_START_POS_2+(diff/SLOPE_TAIL_COUNT)); | 
| 37             *p++=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); | 37             *p++=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); | 
| 38         } else if(diff<=SLOPE_REACH_POS_3) { | 38         } else if(diff<=SLOPE_REACH_POS_3) { | 
| 39             p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); | 39             p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); | 
| 40             diff/=SLOPE_TAIL_COUNT; | 40             diff/=SLOPE_TAIL_COUNT; | 
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 88  * | 88  * | 
| 89  * Most small scripts are allocated within aligned 128-blocks of Unicode | 89  * Most small scripts are allocated within aligned 128-blocks of Unicode | 
| 90  * code points. Lexical order is preserved if "prev" is always moved | 90  * code points. Lexical order is preserved if "prev" is always moved | 
| 91  * into the middle of such a block. | 91  * into the middle of such a block. | 
| 92  * | 92  * | 
| 93  * Additionally, "prev" is moved from anywhere in the Unihan | 93  * Additionally, "prev" is moved from anywhere in the Unihan | 
| 94  * area into the middle of that area. | 94  * area into the middle of that area. | 
| 95  * Note that the identical-level run in a sort key is generated from | 95  * Note that the identical-level run in a sort key is generated from | 
| 96  * NFD text - there are never Hangul characters included. | 96  * NFD text - there are never Hangul characters included. | 
| 97  */ | 97  */ | 
| 98 U_CFUNC void | 98 U_CFUNC UChar32 | 
| 99 u_writeIdenticalLevelRun(const UChar *s, int32_t length, icu::ByteSink &sink) { | 99 u_writeIdenticalLevelRun(UChar32 prev, const UChar *s, int32_t length, icu::Byte
     Sink &sink) { | 
| 100     char scratch[64]; | 100     char scratch[64]; | 
| 101     int32_t capacity; | 101     int32_t capacity; | 
| 102 | 102 | 
| 103     UChar32 prev=0; |  | 
| 104     int32_t i=0; | 103     int32_t i=0; | 
| 105     while(i<length) { | 104     while(i<length) { | 
| 106         char *buffer=sink.GetAppendBuffer(1, length*2, scratch, (int32_t)sizeof(
     scratch), &capacity); | 105         char *buffer=sink.GetAppendBuffer(1, length*2, scratch, (int32_t)sizeof(
     scratch), &capacity); | 
| 107         uint8_t *p; | 106         uint8_t *p; | 
| 108         // We must have capacity>=SLOPE_MAX_BYTES in case u_writeDiff() writes t
     hat much, | 107         // We must have capacity>=SLOPE_MAX_BYTES in case u_writeDiff() writes t
     hat much, | 
| 109         // but we do not want to force the sink.GetAppendBuffer() to allocate | 108         // but we do not want to force the sink.GetAppendBuffer() to allocate | 
| 110         // for a large min_capacity because we might actually only write one byt
     e. | 109         // for a large min_capacity because we might actually only write one byt
     e. | 
| 111         if(capacity<16) { | 110         if(capacity<16) { | 
| 112             buffer=scratch; | 111             buffer=scratch; | 
| 113             capacity=(int32_t)sizeof(scratch); | 112             capacity=(int32_t)sizeof(scratch); | 
| 114         } | 113         } | 
| 115         p=reinterpret_cast<uint8_t *>(buffer); | 114         p=reinterpret_cast<uint8_t *>(buffer); | 
| 116         uint8_t *lastSafe=p+capacity-SLOPE_MAX_BYTES; | 115         uint8_t *lastSafe=p+capacity-SLOPE_MAX_BYTES; | 
| 117         while(i<length && p<=lastSafe) { | 116         while(i<length && p<=lastSafe) { | 
| 118             if(prev<0x4e00 || prev>=0xa000) { | 117             if(prev<0x4e00 || prev>=0xa000) { | 
| 119                 prev=(prev&~0x7f)-SLOPE_REACH_NEG_1; | 118                 prev=(prev&~0x7f)-SLOPE_REACH_NEG_1; | 
| 120             } else { | 119             } else { | 
| 121                 /* | 120                 /* | 
| 122                  * Unihan U+4e00..U+9fa5: | 121                  * Unihan U+4e00..U+9fa5: | 
| 123                  * double-bytes down from the upper end | 122                  * double-bytes down from the upper end | 
| 124                  */ | 123                  */ | 
| 125                 prev=0x9fff-SLOPE_REACH_POS_2; | 124                 prev=0x9fff-SLOPE_REACH_POS_2; | 
| 126             } | 125             } | 
| 127 | 126 | 
| 128             UChar32 c; | 127             UChar32 c; | 
| 129             U16_NEXT(s, i, length, c); | 128             U16_NEXT(s, i, length, c); | 
| 130             p=u_writeDiff(c-prev, p); | 129             if(c==0xfffe) { | 
| 131             prev=c; | 130                 *p++=2;  // merge separator | 
|  | 131                 prev=0; | 
|  | 132             } else { | 
|  | 133                 p=u_writeDiff(c-prev, p); | 
|  | 134                 prev=c; | 
|  | 135             } | 
| 132         } | 136         } | 
| 133         sink.Append(buffer, (int32_t)(p-reinterpret_cast<uint8_t *>(buffer))); | 137         sink.Append(buffer, (int32_t)(p-reinterpret_cast<uint8_t *>(buffer))); | 
| 134     } | 138     } | 
| 135 } | 139     return prev; | 
| 136 |  | 
| 137 U_CFUNC int32_t |  | 
| 138 u_writeIdenticalLevelRunTwoChars(UChar32 first, UChar32 second, uint8_t *p) { |  | 
| 139     uint8_t *p0 = p; |  | 
| 140     if(first<0x4e00 || first>=0xa000) { |  | 
| 141         first=(first&~0x7f)-SLOPE_REACH_NEG_1; |  | 
| 142     } else { |  | 
| 143         /* |  | 
| 144          * Unihan U+4e00..U+9fa5: |  | 
| 145          * double-bytes down from the upper end |  | 
| 146          */ |  | 
| 147         first=0x9fff-SLOPE_REACH_POS_2; |  | 
| 148     } |  | 
| 149 |  | 
| 150     p=u_writeDiff(second-first, p); |  | 
| 151     return (int32_t)(p-p0); |  | 
| 152 } | 140 } | 
| 153 | 141 | 
| 154 #endif /* #if !UCONFIG_NO_COLLATION */ | 142 #endif /* #if !UCONFIG_NO_COLLATION */ | 
| OLD | NEW | 
|---|