OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2013-2014, International Business Machines | 3 * Copyright (C) 2013-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * collationdatawriter.cpp | 6 * collationdatawriter.cpp |
7 * | 7 * |
8 * created on: 2013aug06 | 8 * created on: 2013aug06 |
9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
10 */ | 10 */ |
11 | 11 |
12 #include "unicode/utypes.h" | 12 #include "unicode/utypes.h" |
13 | 13 |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
61 static const UDataInfo dataInfo = { | 61 static const UDataInfo dataInfo = { |
62 sizeof(UDataInfo), | 62 sizeof(UDataInfo), |
63 0, | 63 0, |
64 | 64 |
65 U_IS_BIG_ENDIAN, | 65 U_IS_BIG_ENDIAN, |
66 U_CHARSET_FAMILY, | 66 U_CHARSET_FAMILY, |
67 U_SIZEOF_UCHAR, | 67 U_SIZEOF_UCHAR, |
68 0, | 68 0, |
69 | 69 |
70 { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol" | 70 { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol" |
71 { 4, 0, 0, 0 }, // formatVersion | 71 { 5, 0, 0, 0 }, // formatVersion |
72 { 6, 3, 0, 0 } // dataVersion | 72 { 6, 3, 0, 0 } // dataVersion |
73 }; | 73 }; |
74 | 74 |
75 int32_t | 75 int32_t |
76 CollationDataWriter::writeBase(const CollationData &data, const CollationSetting
s &settings, | 76 CollationDataWriter::writeBase(const CollationData &data, const CollationSetting
s &settings, |
77 const void *rootElements, int32_t rootElementsLen
gth, | 77 const void *rootElements, int32_t rootElementsLen
gth, |
78 int32_t indexes[], uint8_t *dest, int32_t capacit
y, | 78 int32_t indexes[], uint8_t *dest, int32_t capacit
y, |
79 UErrorCode &errorCode) { | 79 UErrorCode &errorCode) { |
80 return write(TRUE, NULL, | 80 return write(TRUE, NULL, |
81 data, settings, | 81 data, settings, |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
150 unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->u
nsafeBackwardSet); | 150 unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->u
nsafeBackwardSet); |
151 if(!unsafeBackwardSet.isEmpty()) { | 151 if(!unsafeBackwardSet.isEmpty()) { |
152 indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2; | 152 indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2; |
153 } | 153 } |
154 if(data.fastLatinTable != baseData->fastLatinTable) { | 154 if(data.fastLatinTable != baseData->fastLatinTable) { |
155 fastLatinTableLength = data.fastLatinTableLength; | 155 fastLatinTableLength = data.fastLatinTableLength; |
156 indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2; | 156 indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2; |
157 } | 157 } |
158 } | 158 } |
159 | 159 |
| 160 UVector32 codesAndRanges(errorCode); |
| 161 const int32_t *reorderCodes = settings.reorderCodes; |
| 162 int32_t reorderCodesLength = settings.reorderCodesLength; |
| 163 if(settings.hasReordering() && |
| 164 CollationSettings::reorderTableHasSplitBytes(settings.reorderTable))
{ |
| 165 // Rebuild the full list of reorder ranges. |
| 166 // The list in the settings is truncated for efficiency. |
| 167 data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges,
errorCode); |
| 168 // Write the codes, then the ranges. |
| 169 for(int32_t i = 0; i < reorderCodesLength; ++i) { |
| 170 codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode); |
| 171 } |
| 172 if(U_FAILURE(errorCode)) { return 0; } |
| 173 reorderCodes = codesAndRanges.getBuffer(); |
| 174 reorderCodesLength = codesAndRanges.size(); |
| 175 } |
| 176 |
160 int32_t headerSize; | 177 int32_t headerSize; |
161 if(isBase) { | 178 if(isBase) { |
162 headerSize = 0; // udata_create() writes the header | 179 headerSize = 0; // udata_create() writes the header |
163 } else { | 180 } else { |
164 DataHeader header; | 181 DataHeader header; |
165 header.dataHeader.magic1 = 0xda; | 182 header.dataHeader.magic1 = 0xda; |
166 header.dataHeader.magic2 = 0x27; | 183 header.dataHeader.magic2 = 0x27; |
167 uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo)); | 184 uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo)); |
168 uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo)); | 185 uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo)); |
169 headerSize = (int32_t)sizeof(header); | 186 headerSize = (int32_t)sizeof(header); |
170 U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes | 187 U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes |
171 if(hasMappings && data.cesLength != 0) { | 188 if(hasMappings && data.cesLength != 0) { |
172 // Sum of the sizes of the data items which are | 189 // Sum of the sizes of the data items which are |
173 // not automatically multiples of 8 bytes and which are placed befor
e the CEs. | 190 // not automatically multiples of 8 bytes and which are placed befor
e the CEs. |
174 int32_t sum = headerSize + (indexesLength + settings.reorderCodesLen
gth) * 4; | 191 int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4; |
175 if((sum & 7) != 0) { | 192 if((sum & 7) != 0) { |
176 // We need to add padding somewhere so that the 64-bit CEs are 8
-aligned. | 193 // We need to add padding somewhere so that the 64-bit CEs are 8
-aligned. |
177 // We add to the header size here. | 194 // We add to the header size here. |
178 // Alternatively, we could increment the indexesLength | 195 // Alternatively, we could increment the indexesLength |
179 // or add a few bytes to the reorderTable. | 196 // or add a few bytes to the reorderTable. |
180 headerSize += 4; | 197 headerSize += 4; |
181 } | 198 } |
182 } | 199 } |
183 header.dataHeader.headerSize = (uint16_t)headerSize; | 200 header.dataHeader.headerSize = (uint16_t)headerSize; |
184 if(headerSize <= capacity) { | 201 if(headerSize <= capacity) { |
(...skipping 19 matching lines...) Expand all Loading... |
204 // We add the headerSize at the very end. | 221 // We add the headerSize at the very end. |
205 int32_t totalSize = indexesLength * 4; | 222 int32_t totalSize = indexesLength * 4; |
206 | 223 |
207 if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) { | 224 if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) { |
208 indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - dat
a.ce32s; | 225 indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - dat
a.ce32s; |
209 } else { | 226 } else { |
210 indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1; | 227 indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1; |
211 } | 228 } |
212 | 229 |
213 indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize; | 230 indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize; |
214 totalSize += settings.reorderCodesLength * 4; | 231 totalSize += reorderCodesLength * 4; |
215 | 232 |
216 indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize; | 233 indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize; |
217 if(settings.reorderTable != NULL) { | 234 if(settings.reorderTable != NULL) { |
218 totalSize += 256; | 235 totalSize += 256; |
219 } | 236 } |
220 | 237 |
221 indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize; | 238 indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize; |
222 if(hasMappings) { | 239 if(hasMappings) { |
223 UErrorCode errorCode2 = U_ZERO_ERROR; | 240 UErrorCode errorCode2 = U_ZERO_ERROR; |
224 int32_t length; | 241 int32_t length; |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
273 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { | 290 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { |
274 errorCode = errorCode2; | 291 errorCode = errorCode2; |
275 return 0; | 292 return 0; |
276 } | 293 } |
277 totalSize += length * 2; | 294 totalSize += length * 2; |
278 } | 295 } |
279 | 296 |
280 indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize; | 297 indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize; |
281 totalSize += fastLatinTableLength * 2; | 298 totalSize += fastLatinTableLength * 2; |
282 | 299 |
| 300 UnicodeString scripts; |
283 indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize; | 301 indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize; |
284 if(isBase) { | 302 if(isBase) { |
285 totalSize += data.scriptsLength * 2; | 303 scripts.append((UChar)data.numScripts); |
| 304 scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.
numScripts + 16); |
| 305 scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.
scriptStartsLength); |
| 306 totalSize += scripts.length() * 2; |
286 } | 307 } |
287 | 308 |
288 indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize; | 309 indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize; |
289 if(isBase) { | 310 if(isBase) { |
290 totalSize += 256; | 311 totalSize += 256; |
291 } | 312 } |
292 | 313 |
293 indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize; | 314 indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize; |
294 indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize; | 315 indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize; |
295 | 316 |
296 if(totalSize > capacity) { | 317 if(totalSize > capacity) { |
297 errorCode = U_BUFFER_OVERFLOW_ERROR; | 318 errorCode = U_BUFFER_OVERFLOW_ERROR; |
298 return headerSize + totalSize; | 319 return headerSize + totalSize; |
299 } | 320 } |
300 | 321 |
301 uprv_memcpy(dest, indexes, indexesLength * 4); | 322 uprv_memcpy(dest, indexes, indexesLength * 4); |
302 copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, settings.reo
rderCodes, dest); | 323 copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes
, dest); |
303 copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reo
rderTable, dest); | 324 copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reo
rderTable, dest); |
304 // The trie has already been serialized into the dest buffer. | 325 // The trie has already been serialized into the dest buffer. |
305 copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest); | 326 copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest); |
306 copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest); | 327 copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest); |
307 copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements
, dest); | 328 copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements
, dest); |
308 copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, de
st); | 329 copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, de
st); |
309 // The unsafeBackwardSet has already been serialized into the dest buffer. | 330 // The unsafeBackwardSet has already been serialized into the dest buffer. |
310 copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fast
LatinTable, dest); | 331 copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fast
LatinTable, dest); |
311 copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, data.scripts, dest
); | 332 copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(
), dest); |
312 copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.co
mpressibleBytes, dest); | 333 copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.co
mpressibleBytes, dest); |
313 | 334 |
314 return headerSize + totalSize; | 335 return headerSize + totalSize; |
315 } | 336 } |
316 | 337 |
317 void | 338 void |
318 CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex, | 339 CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex, |
319 const void *src, uint8_t *dest) { | 340 const void *src, uint8_t *dest) { |
320 int32_t start = indexes[startIndex]; | 341 int32_t start = indexes[startIndex]; |
321 int32_t limit = indexes[startIndex + 1]; | 342 int32_t limit = indexes[startIndex + 1]; |
322 if(start < limit) { | 343 if(start < limit) { |
323 uprv_memcpy(dest + start, src, limit - start); | 344 uprv_memcpy(dest + start, src, limit - start); |
324 } | 345 } |
325 } | 346 } |
326 | 347 |
327 U_NAMESPACE_END | 348 U_NAMESPACE_END |
328 | 349 |
329 #endif // !UCONFIG_NO_COLLATION | 350 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |