OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2001-2011, International Business Machines | 3 * Copyright (C) 2001-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * file name: bocsu.cpp | 6 * file name: bocsu.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
11 * Author: Markus W. Scherer | 11 * Author: Markus W. Scherer |
12 * | 12 * |
13 * Modification history: | 13 * Modification history: |
14 * 05/18/2001 weiv Made into separate module | 14 * 05/18/2001 weiv Made into separate module |
15 */ | 15 */ |
16 | 16 |
17 | 17 |
18 #include "unicode/utypes.h" | 18 #include "unicode/utypes.h" |
19 | 19 |
20 #if !UCONFIG_NO_COLLATION | 20 #if !UCONFIG_NO_COLLATION |
21 | 21 |
22 #include "unicode/bytestream.h" | 22 #include "unicode/bytestream.h" |
23 #include "unicode/utf16.h" | 23 #include "unicode/utf16.h" |
24 #include "bocsu.h" | 24 #include "bocsu.h" |
25 | 25 |
26 /* | 26 /* |
27 * encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes, | 27 * encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes, |
28 * preserving lexical order | 28 * preserving lexical order |
29 */ | 29 */ |
30 U_CFUNC uint8_t * | 30 static uint8_t * |
31 u_writeDiff(int32_t diff, uint8_t *p) { | 31 u_writeDiff(int32_t diff, uint8_t *p) { |
32 if(diff>=SLOPE_REACH_NEG_1) { | 32 if(diff>=SLOPE_REACH_NEG_1) { |
33 if(diff<=SLOPE_REACH_POS_1) { | 33 if(diff<=SLOPE_REACH_POS_1) { |
34 *p++=(uint8_t)(SLOPE_MIDDLE+diff); | 34 *p++=(uint8_t)(SLOPE_MIDDLE+diff); |
35 } else if(diff<=SLOPE_REACH_POS_2) { | 35 } else if(diff<=SLOPE_REACH_POS_2) { |
36 *p++=(uint8_t)(SLOPE_START_POS_2+(diff/SLOPE_TAIL_COUNT)); | 36 *p++=(uint8_t)(SLOPE_START_POS_2+(diff/SLOPE_TAIL_COUNT)); |
37 *p++=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); | 37 *p++=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
38 } else if(diff<=SLOPE_REACH_POS_3) { | 38 } else if(diff<=SLOPE_REACH_POS_3) { |
39 p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); | 39 p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT); |
40 diff/=SLOPE_TAIL_COUNT; | 40 diff/=SLOPE_TAIL_COUNT; |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 * | 88 * |
89 * Most small scripts are allocated within aligned 128-blocks of Unicode | 89 * Most small scripts are allocated within aligned 128-blocks of Unicode |
90 * code points. Lexical order is preserved if "prev" is always moved | 90 * code points. Lexical order is preserved if "prev" is always moved |
91 * into the middle of such a block. | 91 * into the middle of such a block. |
92 * | 92 * |
93 * Additionally, "prev" is moved from anywhere in the Unihan | 93 * Additionally, "prev" is moved from anywhere in the Unihan |
94 * area into the middle of that area. | 94 * area into the middle of that area. |
95 * Note that the identical-level run in a sort key is generated from | 95 * Note that the identical-level run in a sort key is generated from |
96 * NFD text - there are never Hangul characters included. | 96 * NFD text - there are never Hangul characters included. |
97 */ | 97 */ |
98 U_CFUNC void | 98 U_CFUNC UChar32 |
99 u_writeIdenticalLevelRun(const UChar *s, int32_t length, icu::ByteSink &sink) { | 99 u_writeIdenticalLevelRun(UChar32 prev, const UChar *s, int32_t length, icu::Byte
Sink &sink) { |
100 char scratch[64]; | 100 char scratch[64]; |
101 int32_t capacity; | 101 int32_t capacity; |
102 | 102 |
103 UChar32 prev=0; | |
104 int32_t i=0; | 103 int32_t i=0; |
105 while(i<length) { | 104 while(i<length) { |
106 char *buffer=sink.GetAppendBuffer(1, length*2, scratch, (int32_t)sizeof(
scratch), &capacity); | 105 char *buffer=sink.GetAppendBuffer(1, length*2, scratch, (int32_t)sizeof(
scratch), &capacity); |
107 uint8_t *p; | 106 uint8_t *p; |
108 // We must have capacity>=SLOPE_MAX_BYTES in case u_writeDiff() writes t
hat much, | 107 // We must have capacity>=SLOPE_MAX_BYTES in case u_writeDiff() writes t
hat much, |
109 // but we do not want to force the sink.GetAppendBuffer() to allocate | 108 // but we do not want to force the sink.GetAppendBuffer() to allocate |
110 // for a large min_capacity because we might actually only write one byt
e. | 109 // for a large min_capacity because we might actually only write one byt
e. |
111 if(capacity<16) { | 110 if(capacity<16) { |
112 buffer=scratch; | 111 buffer=scratch; |
113 capacity=(int32_t)sizeof(scratch); | 112 capacity=(int32_t)sizeof(scratch); |
114 } | 113 } |
115 p=reinterpret_cast<uint8_t *>(buffer); | 114 p=reinterpret_cast<uint8_t *>(buffer); |
116 uint8_t *lastSafe=p+capacity-SLOPE_MAX_BYTES; | 115 uint8_t *lastSafe=p+capacity-SLOPE_MAX_BYTES; |
117 while(i<length && p<=lastSafe) { | 116 while(i<length && p<=lastSafe) { |
118 if(prev<0x4e00 || prev>=0xa000) { | 117 if(prev<0x4e00 || prev>=0xa000) { |
119 prev=(prev&~0x7f)-SLOPE_REACH_NEG_1; | 118 prev=(prev&~0x7f)-SLOPE_REACH_NEG_1; |
120 } else { | 119 } else { |
121 /* | 120 /* |
122 * Unihan U+4e00..U+9fa5: | 121 * Unihan U+4e00..U+9fa5: |
123 * double-bytes down from the upper end | 122 * double-bytes down from the upper end |
124 */ | 123 */ |
125 prev=0x9fff-SLOPE_REACH_POS_2; | 124 prev=0x9fff-SLOPE_REACH_POS_2; |
126 } | 125 } |
127 | 126 |
128 UChar32 c; | 127 UChar32 c; |
129 U16_NEXT(s, i, length, c); | 128 U16_NEXT(s, i, length, c); |
130 p=u_writeDiff(c-prev, p); | 129 if(c==0xfffe) { |
131 prev=c; | 130 *p++=2; // merge separator |
| 131 prev=0; |
| 132 } else { |
| 133 p=u_writeDiff(c-prev, p); |
| 134 prev=c; |
| 135 } |
132 } | 136 } |
133 sink.Append(buffer, (int32_t)(p-reinterpret_cast<uint8_t *>(buffer))); | 137 sink.Append(buffer, (int32_t)(p-reinterpret_cast<uint8_t *>(buffer))); |
134 } | 138 } |
135 } | 139 return prev; |
136 | |
137 U_CFUNC int32_t | |
138 u_writeIdenticalLevelRunTwoChars(UChar32 first, UChar32 second, uint8_t *p) { | |
139 uint8_t *p0 = p; | |
140 if(first<0x4e00 || first>=0xa000) { | |
141 first=(first&~0x7f)-SLOPE_REACH_NEG_1; | |
142 } else { | |
143 /* | |
144 * Unihan U+4e00..U+9fa5: | |
145 * double-bytes down from the upper end | |
146 */ | |
147 first=0x9fff-SLOPE_REACH_POS_2; | |
148 } | |
149 | |
150 p=u_writeDiff(second-first, p); | |
151 return (int32_t)(p-p0); | |
152 } | 140 } |
153 | 141 |
154 #endif /* #if !UCONFIG_NO_COLLATION */ | 142 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |