Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(173)

Side by Side Diff: source/i18n/collationdatawriter.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/collationdatawriter.h ('k') | source/i18n/collationfastlatin.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationdatawriter.cpp
7 *
8 * created on: 2013aug06
9 * created by: Markus W. Scherer
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION
15
16 #include "unicode/tblcoll.h"
17 #include "unicode/udata.h"
18 #include "unicode/uniset.h"
19 #include "cmemory.h"
20 #include "collationdata.h"
21 #include "collationdatabuilder.h"
22 #include "collationdatareader.h"
23 #include "collationdatawriter.h"
24 #include "collationfastlatin.h"
25 #include "collationsettings.h"
26 #include "collationtailoring.h"
27 #include "uassert.h"
28 #include "ucmndata.h"
29
30 U_NAMESPACE_BEGIN
31
32 uint8_t *
33 RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {
34 if(U_FAILURE(errorCode)) { return NULL; }
35 LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
36 if(buffer.isNull()) {
37 errorCode = U_MEMORY_ALLOCATION_ERROR;
38 return NULL;
39 }
40 length = cloneBinary(buffer.getAlias(), 20000, errorCode);
41 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
42 if(buffer.allocateInsteadAndCopy(length, 0) == NULL) {
43 errorCode = U_MEMORY_ALLOCATION_ERROR;
44 return NULL;
45 }
46 errorCode = U_ZERO_ERROR;
47 length = cloneBinary(buffer.getAlias(), length, errorCode);
48 }
49 if(U_FAILURE(errorCode)) { return NULL; }
50 return buffer.orphan();
51 }
52
53 int32_t
54 RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &erro rCode) const {
55 int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];
56 return CollationDataWriter::writeTailoring(
57 *tailoring, *settings, indexes, dest, capacity,
58 errorCode);
59 }
60
61 static const UDataInfo dataInfo = {
62 sizeof(UDataInfo),
63 0,
64
65 U_IS_BIG_ENDIAN,
66 U_CHARSET_FAMILY,
67 U_SIZEOF_UCHAR,
68 0,
69
70 { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol"
71 { 4, 0, 0, 0 }, // formatVersion
72 { 6, 3, 0, 0 } // dataVersion
73 };
74
75 int32_t
76 CollationDataWriter::writeBase(const CollationData &data, const CollationSetting s &settings,
77 const void *rootElements, int32_t rootElementsLen gth,
78 int32_t indexes[], uint8_t *dest, int32_t capacit y,
79 UErrorCode &errorCode) {
80 return write(TRUE, NULL,
81 data, settings,
82 rootElements, rootElementsLength,
83 indexes, dest, capacity, errorCode);
84 }
85
86 int32_t
87 CollationDataWriter::writeTailoring(const CollationTailoring &t, const Collation Settings &settings,
88 int32_t indexes[], uint8_t *dest, int32_t ca pacity,
89 UErrorCode &errorCode) {
90 return write(FALSE, t.version,
91 *t.data, settings,
92 NULL, 0,
93 indexes, dest, capacity, errorCode);
94 }
95
96 int32_t
97 CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
98 const CollationData &data, const CollationSettings &s ettings,
99 const void *rootElements, int32_t rootElementsLength,
100 int32_t indexes[], uint8_t *dest, int32_t capacity,
101 UErrorCode &errorCode) {
102 if(U_FAILURE(errorCode)) { return 0; }
103 if(capacity < 0 || (capacity > 0 && dest == NULL)) {
104 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
105 return 0;
106 }
107
108 // Figure out which data items to write before settling on
109 // the indexes length and writing offsets.
110 // For any data item, we need to write the start and limit offsets,
111 // so the indexes length must be at least index-of-start-offset + 2.
112 int32_t indexesLength;
113 UBool hasMappings;
114 UnicodeSet unsafeBackwardSet;
115 const CollationData *baseData = data.base;
116
117 int32_t fastLatinVersion;
118 if(data.fastLatinTable != NULL) {
119 fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16;
120 } else {
121 fastLatinVersion = 0;
122 }
123 int32_t fastLatinTableLength = 0;
124
125 if(isBase) {
126 // For the root collator, we write an even number of indexes
127 // so that we start with an 8-aligned offset.
128 indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
129 U_ASSERT(settings.reorderCodesLength == 0);
130 hasMappings = TRUE;
131 unsafeBackwardSet = *data.unsafeBackwardSet;
132 fastLatinTableLength = data.fastLatinTableLength;
133 } else if(baseData == NULL) {
134 hasMappings = FALSE;
135 if(settings.reorderCodesLength == 0) {
136 // only options
137 indexesLength = CollationDataReader::IX_OPTIONS + 1; // no limit of fset here
138 } else {
139 // only options, reorder codes, and the reorder table
140 indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
141 }
142 } else {
143 hasMappings = TRUE;
144 // Tailored mappings, and what else?
145 // Check in ascending order of optional tailoring data items.
146 indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
147 if(data.contextsLength != 0) {
148 indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
149 }
150 unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->u nsafeBackwardSet);
151 if(!unsafeBackwardSet.isEmpty()) {
152 indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
153 }
154 if(data.fastLatinTable != baseData->fastLatinTable) {
155 fastLatinTableLength = data.fastLatinTableLength;
156 indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
157 }
158 }
159
160 int32_t headerSize;
161 if(isBase) {
162 headerSize = 0; // udata_create() writes the header
163 } else {
164 DataHeader header;
165 header.dataHeader.magic1 = 0xda;
166 header.dataHeader.magic2 = 0x27;
167 uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
168 uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
169 headerSize = (int32_t)sizeof(header);
170 U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes
171 if(hasMappings && data.cesLength != 0) {
172 // Sum of the sizes of the data items which are
173 // not automatically multiples of 8 bytes and which are placed befor e the CEs.
174 int32_t sum = headerSize + (indexesLength + settings.reorderCodesLen gth) * 4;
175 if((sum & 7) != 0) {
176 // We need to add padding somewhere so that the 64-bit CEs are 8 -aligned.
177 // We add to the header size here.
178 // Alternatively, we could increment the indexesLength
179 // or add a few bytes to the reorderTable.
180 headerSize += 4;
181 }
182 }
183 header.dataHeader.headerSize = (uint16_t)headerSize;
184 if(headerSize <= capacity) {
185 uprv_memcpy(dest, &header, sizeof(header));
186 // Write 00 bytes so that the padding is not mistaken for a copyrigh t string.
187 uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(h eader));
188 dest += headerSize;
189 capacity -= headerSize;
190 } else {
191 dest = NULL;
192 capacity = 0;
193 }
194 }
195
196 indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;
197 U_ASSERT((settings.options & ~0xffff) == 0);
198 indexes[CollationDataReader::IX_OPTIONS] =
199 data.numericPrimary | fastLatinVersion | settings.options;
200 indexes[CollationDataReader::IX_RESERVED2] = 0;
201 indexes[CollationDataReader::IX_RESERVED3] = 0;
202
203 // Byte offsets of data items all start from the start of the indexes.
204 // We add the headerSize at the very end.
205 int32_t totalSize = indexesLength * 4;
206
207 if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {
208 indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - dat a.ce32s;
209 } else {
210 indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
211 }
212
213 indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;
214 totalSize += settings.reorderCodesLength * 4;
215
216 indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;
217 if(settings.reorderTable != NULL) {
218 totalSize += 256;
219 }
220
221 indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;
222 if(hasMappings) {
223 UErrorCode errorCode2 = U_ZERO_ERROR;
224 int32_t length;
225 if(totalSize < capacity) {
226 length = utrie2_serialize(data.trie, dest + totalSize,
227 capacity - totalSize, &errorCode2);
228 } else {
229 length = utrie2_serialize(data.trie, NULL, 0, &errorCode2);
230 }
231 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
232 errorCode = errorCode2;
233 return 0;
234 }
235 // The trie size should be a multiple of 8 bytes due to the way
236 // compactIndex2(UNewTrie2 *trie) currently works.
237 U_ASSERT((length & 7) == 0);
238 totalSize += length;
239 }
240
241 indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;
242 indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;
243 if(hasMappings && data.cesLength != 0) {
244 U_ASSERT(((headerSize + totalSize) & 7) == 0);
245 totalSize += data.cesLength * 8;
246 }
247
248 indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;
249 indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;
250 if(hasMappings) {
251 totalSize += data.ce32sLength * 4;
252 }
253
254 indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;
255 totalSize += rootElementsLength * 4;
256
257 indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;
258 if(hasMappings) {
259 totalSize += data.contextsLength * 2;
260 }
261
262 indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;
263 if(hasMappings && !unsafeBackwardSet.isEmpty()) {
264 UErrorCode errorCode2 = U_ZERO_ERROR;
265 int32_t length;
266 if(totalSize < capacity) {
267 uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize);
268 length = unsafeBackwardSet.serialize(
269 p, (capacity - totalSize) / 2, errorCode2);
270 } else {
271 length = unsafeBackwardSet.serialize(NULL, 0, errorCode2);
272 }
273 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
274 errorCode = errorCode2;
275 return 0;
276 }
277 totalSize += length * 2;
278 }
279
280 indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;
281 totalSize += fastLatinTableLength * 2;
282
283 indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;
284 if(isBase) {
285 totalSize += data.scriptsLength * 2;
286 }
287
288 indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;
289 if(isBase) {
290 totalSize += 256;
291 }
292
293 indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;
294 indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;
295
296 if(totalSize > capacity) {
297 errorCode = U_BUFFER_OVERFLOW_ERROR;
298 return headerSize + totalSize;
299 }
300
301 uprv_memcpy(dest, indexes, indexesLength * 4);
302 copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, settings.reo rderCodes, dest);
303 copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reo rderTable, dest);
304 // The trie has already been serialized into the dest buffer.
305 copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);
306 copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);
307 copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements , dest);
308 copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, de st);
309 // The unsafeBackwardSet has already been serialized into the dest buffer.
310 copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fast LatinTable, dest);
311 copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, data.scripts, dest );
312 copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.co mpressibleBytes, dest);
313
314 return headerSize + totalSize;
315 }
316
317 void
318 CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,
319 const void *src, uint8_t *dest) {
320 int32_t start = indexes[startIndex];
321 int32_t limit = indexes[startIndex + 1];
322 if(start < limit) {
323 uprv_memcpy(dest + start, src, limit - start);
324 }
325 }
326
327 U_NAMESPACE_END
328
329 #endif // !UCONFIG_NO_COLLATION
OLDNEW
« no previous file with comments | « source/i18n/collationdatawriter.h ('k') | source/i18n/collationfastlatin.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698