| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2013-2014, International Business Machines | 3 * Copyright (C) 2013-2015, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 * collationdatawriter.cpp | 6 * collationdatawriter.cpp |
| 7 * | 7 * |
| 8 * created on: 2013aug06 | 8 * created on: 2013aug06 |
| 9 * created by: Markus W. Scherer | 9 * created by: Markus W. Scherer |
| 10 */ | 10 */ |
| 11 | 11 |
| 12 #include "unicode/utypes.h" | 12 #include "unicode/utypes.h" |
| 13 | 13 |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 61 static const UDataInfo dataInfo = { | 61 static const UDataInfo dataInfo = { |
| 62 sizeof(UDataInfo), | 62 sizeof(UDataInfo), |
| 63 0, | 63 0, |
| 64 | 64 |
| 65 U_IS_BIG_ENDIAN, | 65 U_IS_BIG_ENDIAN, |
| 66 U_CHARSET_FAMILY, | 66 U_CHARSET_FAMILY, |
| 67 U_SIZEOF_UCHAR, | 67 U_SIZEOF_UCHAR, |
| 68 0, | 68 0, |
| 69 | 69 |
| 70 { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol" | 70 { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol" |
| 71 { 4, 0, 0, 0 }, // formatVersion | 71 { 5, 0, 0, 0 }, // formatVersion |
| 72 { 6, 3, 0, 0 } // dataVersion | 72 { 6, 3, 0, 0 } // dataVersion |
| 73 }; | 73 }; |
| 74 | 74 |
| 75 int32_t | 75 int32_t |
| 76 CollationDataWriter::writeBase(const CollationData &data, const CollationSetting
s &settings, | 76 CollationDataWriter::writeBase(const CollationData &data, const CollationSetting
s &settings, |
| 77 const void *rootElements, int32_t rootElementsLen
gth, | 77 const void *rootElements, int32_t rootElementsLen
gth, |
| 78 int32_t indexes[], uint8_t *dest, int32_t capacit
y, | 78 int32_t indexes[], uint8_t *dest, int32_t capacit
y, |
| 79 UErrorCode &errorCode) { | 79 UErrorCode &errorCode) { |
| 80 return write(TRUE, NULL, | 80 return write(TRUE, NULL, |
| 81 data, settings, | 81 data, settings, |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->u
nsafeBackwardSet); | 150 unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->u
nsafeBackwardSet); |
| 151 if(!unsafeBackwardSet.isEmpty()) { | 151 if(!unsafeBackwardSet.isEmpty()) { |
| 152 indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2; | 152 indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2; |
| 153 } | 153 } |
| 154 if(data.fastLatinTable != baseData->fastLatinTable) { | 154 if(data.fastLatinTable != baseData->fastLatinTable) { |
| 155 fastLatinTableLength = data.fastLatinTableLength; | 155 fastLatinTableLength = data.fastLatinTableLength; |
| 156 indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2; | 156 indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2; |
| 157 } | 157 } |
| 158 } | 158 } |
| 159 | 159 |
| 160 UVector32 codesAndRanges(errorCode); |
| 161 const int32_t *reorderCodes = settings.reorderCodes; |
| 162 int32_t reorderCodesLength = settings.reorderCodesLength; |
| 163 if(settings.hasReordering() && |
| 164 CollationSettings::reorderTableHasSplitBytes(settings.reorderTable))
{ |
| 165 // Rebuild the full list of reorder ranges. |
| 166 // The list in the settings is truncated for efficiency. |
| 167 data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges,
errorCode); |
| 168 // Write the codes, then the ranges. |
| 169 for(int32_t i = 0; i < reorderCodesLength; ++i) { |
| 170 codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode); |
| 171 } |
| 172 if(U_FAILURE(errorCode)) { return 0; } |
| 173 reorderCodes = codesAndRanges.getBuffer(); |
| 174 reorderCodesLength = codesAndRanges.size(); |
| 175 } |
| 176 |
| 160 int32_t headerSize; | 177 int32_t headerSize; |
| 161 if(isBase) { | 178 if(isBase) { |
| 162 headerSize = 0; // udata_create() writes the header | 179 headerSize = 0; // udata_create() writes the header |
| 163 } else { | 180 } else { |
| 164 DataHeader header; | 181 DataHeader header; |
| 165 header.dataHeader.magic1 = 0xda; | 182 header.dataHeader.magic1 = 0xda; |
| 166 header.dataHeader.magic2 = 0x27; | 183 header.dataHeader.magic2 = 0x27; |
| 167 uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo)); | 184 uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo)); |
| 168 uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo)); | 185 uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo)); |
| 169 headerSize = (int32_t)sizeof(header); | 186 headerSize = (int32_t)sizeof(header); |
| 170 U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes | 187 U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes |
| 171 if(hasMappings && data.cesLength != 0) { | 188 if(hasMappings && data.cesLength != 0) { |
| 172 // Sum of the sizes of the data items which are | 189 // Sum of the sizes of the data items which are |
| 173 // not automatically multiples of 8 bytes and which are placed befor
e the CEs. | 190 // not automatically multiples of 8 bytes and which are placed befor
e the CEs. |
| 174 int32_t sum = headerSize + (indexesLength + settings.reorderCodesLen
gth) * 4; | 191 int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4; |
| 175 if((sum & 7) != 0) { | 192 if((sum & 7) != 0) { |
| 176 // We need to add padding somewhere so that the 64-bit CEs are 8
-aligned. | 193 // We need to add padding somewhere so that the 64-bit CEs are 8
-aligned. |
| 177 // We add to the header size here. | 194 // We add to the header size here. |
| 178 // Alternatively, we could increment the indexesLength | 195 // Alternatively, we could increment the indexesLength |
| 179 // or add a few bytes to the reorderTable. | 196 // or add a few bytes to the reorderTable. |
| 180 headerSize += 4; | 197 headerSize += 4; |
| 181 } | 198 } |
| 182 } | 199 } |
| 183 header.dataHeader.headerSize = (uint16_t)headerSize; | 200 header.dataHeader.headerSize = (uint16_t)headerSize; |
| 184 if(headerSize <= capacity) { | 201 if(headerSize <= capacity) { |
| (...skipping 19 matching lines...) Expand all Loading... |
| 204 // We add the headerSize at the very end. | 221 // We add the headerSize at the very end. |
| 205 int32_t totalSize = indexesLength * 4; | 222 int32_t totalSize = indexesLength * 4; |
| 206 | 223 |
| 207 if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) { | 224 if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) { |
| 208 indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - dat
a.ce32s; | 225 indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - dat
a.ce32s; |
| 209 } else { | 226 } else { |
| 210 indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1; | 227 indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1; |
| 211 } | 228 } |
| 212 | 229 |
| 213 indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize; | 230 indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize; |
| 214 totalSize += settings.reorderCodesLength * 4; | 231 totalSize += reorderCodesLength * 4; |
| 215 | 232 |
| 216 indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize; | 233 indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize; |
| 217 if(settings.reorderTable != NULL) { | 234 if(settings.reorderTable != NULL) { |
| 218 totalSize += 256; | 235 totalSize += 256; |
| 219 } | 236 } |
| 220 | 237 |
| 221 indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize; | 238 indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize; |
| 222 if(hasMappings) { | 239 if(hasMappings) { |
| 223 UErrorCode errorCode2 = U_ZERO_ERROR; | 240 UErrorCode errorCode2 = U_ZERO_ERROR; |
| 224 int32_t length; | 241 int32_t length; |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 273 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { | 290 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { |
| 274 errorCode = errorCode2; | 291 errorCode = errorCode2; |
| 275 return 0; | 292 return 0; |
| 276 } | 293 } |
| 277 totalSize += length * 2; | 294 totalSize += length * 2; |
| 278 } | 295 } |
| 279 | 296 |
| 280 indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize; | 297 indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize; |
| 281 totalSize += fastLatinTableLength * 2; | 298 totalSize += fastLatinTableLength * 2; |
| 282 | 299 |
| 300 UnicodeString scripts; |
| 283 indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize; | 301 indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize; |
| 284 if(isBase) { | 302 if(isBase) { |
| 285 totalSize += data.scriptsLength * 2; | 303 scripts.append((UChar)data.numScripts); |
| 304 scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.
numScripts + 16); |
| 305 scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.
scriptStartsLength); |
| 306 totalSize += scripts.length() * 2; |
| 286 } | 307 } |
| 287 | 308 |
| 288 indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize; | 309 indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize; |
| 289 if(isBase) { | 310 if(isBase) { |
| 290 totalSize += 256; | 311 totalSize += 256; |
| 291 } | 312 } |
| 292 | 313 |
| 293 indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize; | 314 indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize; |
| 294 indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize; | 315 indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize; |
| 295 | 316 |
| 296 if(totalSize > capacity) { | 317 if(totalSize > capacity) { |
| 297 errorCode = U_BUFFER_OVERFLOW_ERROR; | 318 errorCode = U_BUFFER_OVERFLOW_ERROR; |
| 298 return headerSize + totalSize; | 319 return headerSize + totalSize; |
| 299 } | 320 } |
| 300 | 321 |
| 301 uprv_memcpy(dest, indexes, indexesLength * 4); | 322 uprv_memcpy(dest, indexes, indexesLength * 4); |
| 302 copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, settings.reo
rderCodes, dest); | 323 copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes
, dest); |
| 303 copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reo
rderTable, dest); | 324 copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reo
rderTable, dest); |
| 304 // The trie has already been serialized into the dest buffer. | 325 // The trie has already been serialized into the dest buffer. |
| 305 copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest); | 326 copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest); |
| 306 copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest); | 327 copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest); |
| 307 copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements
, dest); | 328 copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements
, dest); |
| 308 copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, de
st); | 329 copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, de
st); |
| 309 // The unsafeBackwardSet has already been serialized into the dest buffer. | 330 // The unsafeBackwardSet has already been serialized into the dest buffer. |
| 310 copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fast
LatinTable, dest); | 331 copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fast
LatinTable, dest); |
| 311 copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, data.scripts, dest
); | 332 copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(
), dest); |
| 312 copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.co
mpressibleBytes, dest); | 333 copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.co
mpressibleBytes, dest); |
| 313 | 334 |
| 314 return headerSize + totalSize; | 335 return headerSize + totalSize; |
| 315 } | 336 } |
| 316 | 337 |
| 317 void | 338 void |
| 318 CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex, | 339 CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex, |
| 319 const void *src, uint8_t *dest) { | 340 const void *src, uint8_t *dest) { |
| 320 int32_t start = indexes[startIndex]; | 341 int32_t start = indexes[startIndex]; |
| 321 int32_t limit = indexes[startIndex + 1]; | 342 int32_t limit = indexes[startIndex + 1]; |
| 322 if(start < limit) { | 343 if(start < limit) { |
| 323 uprv_memcpy(dest + start, src, limit - start); | 344 uprv_memcpy(dest + start, src, limit - start); |
| 324 } | 345 } |
| 325 } | 346 } |
| 326 | 347 |
| 327 U_NAMESPACE_END | 348 U_NAMESPACE_END |
| 328 | 349 |
| 329 #endif // !UCONFIG_NO_COLLATION | 350 #endif // !UCONFIG_NO_COLLATION |
| OLD | NEW |