OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2013-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * collationdatawriter.cpp |
| 7 * |
| 8 * created on: 2013aug06 |
| 9 * created by: Markus W. Scherer |
| 10 */ |
| 11 |
| 12 #include "unicode/utypes.h" |
| 13 |
| 14 #if !UCONFIG_NO_COLLATION |
| 15 |
| 16 #include "unicode/tblcoll.h" |
| 17 #include "unicode/udata.h" |
| 18 #include "unicode/uniset.h" |
| 19 #include "cmemory.h" |
| 20 #include "collationdata.h" |
| 21 #include "collationdatabuilder.h" |
| 22 #include "collationdatareader.h" |
| 23 #include "collationdatawriter.h" |
| 24 #include "collationfastlatin.h" |
| 25 #include "collationsettings.h" |
| 26 #include "collationtailoring.h" |
| 27 #include "uassert.h" |
| 28 #include "ucmndata.h" |
| 29 |
| 30 U_NAMESPACE_BEGIN |
| 31 |
| 32 uint8_t * |
| 33 RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const { |
| 34 if(U_FAILURE(errorCode)) { return NULL; } |
| 35 LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000)); |
| 36 if(buffer.isNull()) { |
| 37 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 38 return NULL; |
| 39 } |
| 40 length = cloneBinary(buffer.getAlias(), 20000, errorCode); |
| 41 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { |
| 42 if(buffer.allocateInsteadAndCopy(length, 0) == NULL) { |
| 43 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 44 return NULL; |
| 45 } |
| 46 errorCode = U_ZERO_ERROR; |
| 47 length = cloneBinary(buffer.getAlias(), length, errorCode); |
| 48 } |
| 49 if(U_FAILURE(errorCode)) { return NULL; } |
| 50 return buffer.orphan(); |
| 51 } |
| 52 |
| 53 int32_t |
| 54 RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &erro
rCode) const { |
| 55 int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1]; |
| 56 return CollationDataWriter::writeTailoring( |
| 57 *tailoring, *settings, indexes, dest, capacity, |
| 58 errorCode); |
| 59 } |
| 60 |
| 61 static const UDataInfo dataInfo = { |
| 62 sizeof(UDataInfo), |
| 63 0, |
| 64 |
| 65 U_IS_BIG_ENDIAN, |
| 66 U_CHARSET_FAMILY, |
| 67 U_SIZEOF_UCHAR, |
| 68 0, |
| 69 |
| 70 { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol" |
| 71 { 4, 0, 0, 0 }, // formatVersion |
| 72 { 6, 3, 0, 0 } // dataVersion |
| 73 }; |
| 74 |
| 75 int32_t |
| 76 CollationDataWriter::writeBase(const CollationData &data, const CollationSetting
s &settings, |
| 77 const void *rootElements, int32_t rootElementsLen
gth, |
| 78 int32_t indexes[], uint8_t *dest, int32_t capacit
y, |
| 79 UErrorCode &errorCode) { |
| 80 return write(TRUE, NULL, |
| 81 data, settings, |
| 82 rootElements, rootElementsLength, |
| 83 indexes, dest, capacity, errorCode); |
| 84 } |
| 85 |
| 86 int32_t |
| 87 CollationDataWriter::writeTailoring(const CollationTailoring &t, const Collation
Settings &settings, |
| 88 int32_t indexes[], uint8_t *dest, int32_t ca
pacity, |
| 89 UErrorCode &errorCode) { |
| 90 return write(FALSE, t.version, |
| 91 *t.data, settings, |
| 92 NULL, 0, |
| 93 indexes, dest, capacity, errorCode); |
| 94 } |
| 95 |
| 96 int32_t |
| 97 CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion, |
| 98 const CollationData &data, const CollationSettings &s
ettings, |
| 99 const void *rootElements, int32_t rootElementsLength, |
| 100 int32_t indexes[], uint8_t *dest, int32_t capacity, |
| 101 UErrorCode &errorCode) { |
| 102 if(U_FAILURE(errorCode)) { return 0; } |
| 103 if(capacity < 0 || (capacity > 0 && dest == NULL)) { |
| 104 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 105 return 0; |
| 106 } |
| 107 |
| 108 // Figure out which data items to write before settling on |
| 109 // the indexes length and writing offsets. |
| 110 // For any data item, we need to write the start and limit offsets, |
| 111 // so the indexes length must be at least index-of-start-offset + 2. |
| 112 int32_t indexesLength; |
| 113 UBool hasMappings; |
| 114 UnicodeSet unsafeBackwardSet; |
| 115 const CollationData *baseData = data.base; |
| 116 |
| 117 int32_t fastLatinVersion; |
| 118 if(data.fastLatinTable != NULL) { |
| 119 fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16; |
| 120 } else { |
| 121 fastLatinVersion = 0; |
| 122 } |
| 123 int32_t fastLatinTableLength = 0; |
| 124 |
| 125 if(isBase) { |
| 126 // For the root collator, we write an even number of indexes |
| 127 // so that we start with an 8-aligned offset. |
| 128 indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1; |
| 129 U_ASSERT(settings.reorderCodesLength == 0); |
| 130 hasMappings = TRUE; |
| 131 unsafeBackwardSet = *data.unsafeBackwardSet; |
| 132 fastLatinTableLength = data.fastLatinTableLength; |
| 133 } else if(baseData == NULL) { |
| 134 hasMappings = FALSE; |
| 135 if(settings.reorderCodesLength == 0) { |
| 136 // only options |
| 137 indexesLength = CollationDataReader::IX_OPTIONS + 1; // no limit of
fset here |
| 138 } else { |
| 139 // only options, reorder codes, and the reorder table |
| 140 indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2; |
| 141 } |
| 142 } else { |
| 143 hasMappings = TRUE; |
| 144 // Tailored mappings, and what else? |
| 145 // Check in ascending order of optional tailoring data items. |
| 146 indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2; |
| 147 if(data.contextsLength != 0) { |
| 148 indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2; |
| 149 } |
| 150 unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->u
nsafeBackwardSet); |
| 151 if(!unsafeBackwardSet.isEmpty()) { |
| 152 indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2; |
| 153 } |
| 154 if(data.fastLatinTable != baseData->fastLatinTable) { |
| 155 fastLatinTableLength = data.fastLatinTableLength; |
| 156 indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2; |
| 157 } |
| 158 } |
| 159 |
| 160 int32_t headerSize; |
| 161 if(isBase) { |
| 162 headerSize = 0; // udata_create() writes the header |
| 163 } else { |
| 164 DataHeader header; |
| 165 header.dataHeader.magic1 = 0xda; |
| 166 header.dataHeader.magic2 = 0x27; |
| 167 uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo)); |
| 168 uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo)); |
| 169 headerSize = (int32_t)sizeof(header); |
| 170 U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes |
| 171 if(hasMappings && data.cesLength != 0) { |
| 172 // Sum of the sizes of the data items which are |
| 173 // not automatically multiples of 8 bytes and which are placed befor
e the CEs. |
| 174 int32_t sum = headerSize + (indexesLength + settings.reorderCodesLen
gth) * 4; |
| 175 if((sum & 7) != 0) { |
| 176 // We need to add padding somewhere so that the 64-bit CEs are 8
-aligned. |
| 177 // We add to the header size here. |
| 178 // Alternatively, we could increment the indexesLength |
| 179 // or add a few bytes to the reorderTable. |
| 180 headerSize += 4; |
| 181 } |
| 182 } |
| 183 header.dataHeader.headerSize = (uint16_t)headerSize; |
| 184 if(headerSize <= capacity) { |
| 185 uprv_memcpy(dest, &header, sizeof(header)); |
| 186 // Write 00 bytes so that the padding is not mistaken for a copyrigh
t string. |
| 187 uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(h
eader)); |
| 188 dest += headerSize; |
| 189 capacity -= headerSize; |
| 190 } else { |
| 191 dest = NULL; |
| 192 capacity = 0; |
| 193 } |
| 194 } |
| 195 |
| 196 indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength; |
| 197 U_ASSERT((settings.options & ~0xffff) == 0); |
| 198 indexes[CollationDataReader::IX_OPTIONS] = |
| 199 data.numericPrimary | fastLatinVersion | settings.options; |
| 200 indexes[CollationDataReader::IX_RESERVED2] = 0; |
| 201 indexes[CollationDataReader::IX_RESERVED3] = 0; |
| 202 |
| 203 // Byte offsets of data items all start from the start of the indexes. |
| 204 // We add the headerSize at the very end. |
| 205 int32_t totalSize = indexesLength * 4; |
| 206 |
| 207 if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) { |
| 208 indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - dat
a.ce32s; |
| 209 } else { |
| 210 indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1; |
| 211 } |
| 212 |
| 213 indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize; |
| 214 totalSize += settings.reorderCodesLength * 4; |
| 215 |
| 216 indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize; |
| 217 if(settings.reorderTable != NULL) { |
| 218 totalSize += 256; |
| 219 } |
| 220 |
| 221 indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize; |
| 222 if(hasMappings) { |
| 223 UErrorCode errorCode2 = U_ZERO_ERROR; |
| 224 int32_t length; |
| 225 if(totalSize < capacity) { |
| 226 length = utrie2_serialize(data.trie, dest + totalSize, |
| 227 capacity - totalSize, &errorCode2); |
| 228 } else { |
| 229 length = utrie2_serialize(data.trie, NULL, 0, &errorCode2); |
| 230 } |
| 231 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { |
| 232 errorCode = errorCode2; |
| 233 return 0; |
| 234 } |
| 235 // The trie size should be a multiple of 8 bytes due to the way |
| 236 // compactIndex2(UNewTrie2 *trie) currently works. |
| 237 U_ASSERT((length & 7) == 0); |
| 238 totalSize += length; |
| 239 } |
| 240 |
| 241 indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize; |
| 242 indexes[CollationDataReader::IX_CES_OFFSET] = totalSize; |
| 243 if(hasMappings && data.cesLength != 0) { |
| 244 U_ASSERT(((headerSize + totalSize) & 7) == 0); |
| 245 totalSize += data.cesLength * 8; |
| 246 } |
| 247 |
| 248 indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize; |
| 249 indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize; |
| 250 if(hasMappings) { |
| 251 totalSize += data.ce32sLength * 4; |
| 252 } |
| 253 |
| 254 indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize; |
| 255 totalSize += rootElementsLength * 4; |
| 256 |
| 257 indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize; |
| 258 if(hasMappings) { |
| 259 totalSize += data.contextsLength * 2; |
| 260 } |
| 261 |
| 262 indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize; |
| 263 if(hasMappings && !unsafeBackwardSet.isEmpty()) { |
| 264 UErrorCode errorCode2 = U_ZERO_ERROR; |
| 265 int32_t length; |
| 266 if(totalSize < capacity) { |
| 267 uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize); |
| 268 length = unsafeBackwardSet.serialize( |
| 269 p, (capacity - totalSize) / 2, errorCode2); |
| 270 } else { |
| 271 length = unsafeBackwardSet.serialize(NULL, 0, errorCode2); |
| 272 } |
| 273 if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { |
| 274 errorCode = errorCode2; |
| 275 return 0; |
| 276 } |
| 277 totalSize += length * 2; |
| 278 } |
| 279 |
| 280 indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize; |
| 281 totalSize += fastLatinTableLength * 2; |
| 282 |
| 283 indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize; |
| 284 if(isBase) { |
| 285 totalSize += data.scriptsLength * 2; |
| 286 } |
| 287 |
| 288 indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize; |
| 289 if(isBase) { |
| 290 totalSize += 256; |
| 291 } |
| 292 |
| 293 indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize; |
| 294 indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize; |
| 295 |
| 296 if(totalSize > capacity) { |
| 297 errorCode = U_BUFFER_OVERFLOW_ERROR; |
| 298 return headerSize + totalSize; |
| 299 } |
| 300 |
| 301 uprv_memcpy(dest, indexes, indexesLength * 4); |
| 302 copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, settings.reo
rderCodes, dest); |
| 303 copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reo
rderTable, dest); |
| 304 // The trie has already been serialized into the dest buffer. |
| 305 copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest); |
| 306 copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest); |
| 307 copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements
, dest); |
| 308 copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, de
st); |
| 309 // The unsafeBackwardSet has already been serialized into the dest buffer. |
| 310 copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fast
LatinTable, dest); |
| 311 copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, data.scripts, dest
); |
| 312 copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.co
mpressibleBytes, dest); |
| 313 |
| 314 return headerSize + totalSize; |
| 315 } |
| 316 |
| 317 void |
| 318 CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex, |
| 319 const void *src, uint8_t *dest) { |
| 320 int32_t start = indexes[startIndex]; |
| 321 int32_t limit = indexes[startIndex + 1]; |
| 322 if(start < limit) { |
| 323 uprv_memcpy(dest + start, src, limit - start); |
| 324 } |
| 325 } |
| 326 |
| 327 U_NAMESPACE_END |
| 328 |
| 329 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |