OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * |
| 4 * Copyright (C) 1999-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************/ |
| 8 |
| 9 |
| 10 /*------------------------------------------------------------------------------ |
| 11 * |
| 12 * UCommonData An abstract interface for dealing with ICU Common Data Files. |
| 13 * ICU Common Data Files are a grouping of a number of individua
l |
| 14 * data items (resources, converters, tables, anything) into a |
| 15 * single file or dll. The combined format includes a table of |
| 16 * contents for locating the individual items by name. |
| 17 * |
| 18 * Two formats for the table of contents are supported, which is |
| 19 * why there is an abstract inteface involved. |
| 20 * |
| 21 */ |
| 22 |
| 23 #include "unicode/utypes.h" |
| 24 #include "unicode/udata.h" |
| 25 #include "cstring.h" |
| 26 #include "ucmndata.h" |
| 27 #include "udatamem.h" |
| 28 |
| 29 #if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) |
| 30 # include <stdio.h> |
| 31 #endif |
| 32 |
| 33 U_CFUNC uint16_t |
| 34 udata_getHeaderSize(const DataHeader *udh) { |
| 35 if(udh==NULL) { |
| 36 return 0; |
| 37 } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { |
| 38 /* same endianness */ |
| 39 return udh->dataHeader.headerSize; |
| 40 } else { |
| 41 /* opposite endianness */ |
| 42 uint16_t x=udh->dataHeader.headerSize; |
| 43 return (uint16_t)((x<<8)|(x>>8)); |
| 44 } |
| 45 } |
| 46 |
| 47 U_CFUNC uint16_t |
| 48 udata_getInfoSize(const UDataInfo *info) { |
| 49 if(info==NULL) { |
| 50 return 0; |
| 51 } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { |
| 52 /* same endianness */ |
| 53 return info->size; |
| 54 } else { |
| 55 /* opposite endianness */ |
| 56 uint16_t x=info->size; |
| 57 return (uint16_t)((x<<8)|(x>>8)); |
| 58 } |
| 59 } |
| 60 |
| 61 /*-----------------------------------------------------------------------------* |
| 62 * * |
| 63 * Pointer TOCs. TODO: This form of table-of-contents should be removed * |
| 64 * because DLLs must be relocated on loading to correct the * |
| 65 * pointer values and this operation makes shared memory * |
| 66 * mapping of the data much less likely to work. * |
| 67 * * |
| 68 *-----------------------------------------------------------------------------*
/ |
| 69 typedef struct { |
| 70 const char *entryName; |
| 71 const DataHeader *pHeader; |
| 72 } PointerTOCEntry; |
| 73 |
| 74 |
| 75 typedef struct { |
| 76 uint32_t count; |
| 77 uint32_t reserved; |
| 78 PointerTOCEntry entry[2]; /* Actual size is from count. */ |
| 79 } PointerTOC; |
| 80 |
| 81 |
| 82 /* definition of OffsetTOC struct types moved to ucmndata.h */ |
| 83 |
| 84 /*-----------------------------------------------------------------------------* |
| 85 * * |
| 86 * entry point lookup implementations * |
| 87 * * |
| 88 *-----------------------------------------------------------------------------*
/ |
| 89 static uint32_t offsetTOCEntryCount(const UDataMemory *pData) { |
| 90 int32_t retVal=0; |
| 91 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; |
| 92 if (toc != NULL) { |
| 93 retVal = toc->count; |
| 94 } |
| 95 return retVal; |
| 96 } |
| 97 |
| 98 |
| 99 static const DataHeader * |
| 100 offsetTOCLookupFn(const UDataMemory *pData, |
| 101 const char *tocEntryName, |
| 102 int32_t *pLength, |
| 103 UErrorCode *pErrorCode) { |
| 104 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; |
| 105 if(toc!=NULL) { |
| 106 const char *base=(const char *)toc; |
| 107 uint32_t start, limit, number, lastNumber; |
| 108 int32_t strResult; |
| 109 const UDataOffsetTOCEntry *entry; |
| 110 |
| 111 /* perform a binary search for the data in the common data's table of co
ntents */ |
| 112 #if defined (UDATA_DEBUG_DUMP) |
| 113 /* list the contents of the TOC each time .. not recommended */ |
| 114 for(start=0;start<toc->count;start++) { |
| 115 fprintf(stderr, "\tx%d: %s\n", start, &base[toc->entry[start].nameOffs
et]); |
| 116 } |
| 117 #endif |
| 118 |
| 119 start=0; |
| 120 limit=toc->count; /* number of names in this table of contents *
/ |
| 121 lastNumber=limit; |
| 122 entry=toc->entry; |
| 123 for (;;) { |
| 124 number = (start+limit)/2; |
| 125 if (lastNumber == number) { /* Have we moved? */ |
| 126 break; /* We haven't moved, and it wasn't found; */ |
| 127 /* or the empty stub common data library was used during
build. */ |
| 128 } |
| 129 lastNumber = number; |
| 130 strResult = uprv_strcmp(tocEntryName, base+entry[number].nameOffset)
; |
| 131 if(strResult<0) { |
| 132 limit=number; |
| 133 } else if (strResult>0) { |
| 134 start=number; |
| 135 } |
| 136 else { |
| 137 /* found it */ |
| 138 #ifdef UDATA_DEBUG |
| 139 fprintf(stderr, "%s: Found.\n", tocEntryName); |
| 140 #endif |
| 141 entry += number; /* Alias the entry to the current entry. */ |
| 142 if((number+1) < toc->count) { |
| 143 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset
); |
| 144 } else { |
| 145 *pLength = -1; |
| 146 } |
| 147 return (const DataHeader *)(base+entry->dataOffset); |
| 148 } |
| 149 } |
| 150 #ifdef UDATA_DEBUG |
| 151 fprintf(stderr, "%s: Not found.\n", tocEntryName); |
| 152 #endif |
| 153 return NULL; |
| 154 } else { |
| 155 #ifdef UDATA_DEBUG |
| 156 fprintf(stderr, "returning header\n"); |
| 157 #endif |
| 158 |
| 159 return pData->pHeader; |
| 160 } |
| 161 } |
| 162 |
| 163 |
| 164 static uint32_t pointerTOCEntryCount(const UDataMemory *pData) { |
| 165 const PointerTOC *toc = (PointerTOC *)pData->toc; |
| 166 return (uint32_t)((toc != NULL) ? (toc->count) : 0); |
| 167 } |
| 168 |
| 169 |
| 170 static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData, |
| 171 const char *name, |
| 172 int32_t *pLength, |
| 173 UErrorCode *pErrorCode) { |
| 174 if(pData->toc!=NULL) { |
| 175 const PointerTOC *toc = (PointerTOC *)pData->toc; |
| 176 uint32_t start, limit, number, lastNumber; |
| 177 int32_t strResult; |
| 178 |
| 179 #if defined (UDATA_DEBUG_DUMP) |
| 180 /* list the contents of the TOC each time .. not recommended */ |
| 181 for(start=0;start<toc->count;start++) { |
| 182 fprintf(stderr, "\tx%d: %s\n", start, toc->entry[start].entryName); |
| 183 } |
| 184 #endif |
| 185 |
| 186 /* perform a binary search for the data in the common data's table of co
ntents */ |
| 187 start=0; |
| 188 limit=toc->count; |
| 189 lastNumber=limit; |
| 190 |
| 191 for (;;) { |
| 192 number = (start+limit)/2; |
| 193 if (lastNumber == number) { /* Have we moved? */ |
| 194 break; /* We haven't moved, and it wasn't found, */ |
| 195 /* or the empty stub common data library was used during
build. */ |
| 196 } |
| 197 lastNumber = number; |
| 198 strResult = uprv_strcmp(name, toc->entry[number].entryName); |
| 199 if(strResult<0) { |
| 200 limit=number; |
| 201 } else if (strResult>0) { |
| 202 start=number; |
| 203 } |
| 204 else { |
| 205 /* found it */ |
| 206 #ifdef UDATA_DEBUG |
| 207 fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); |
| 208 #endif |
| 209 *pLength=-1; |
| 210 return UDataMemory_normalizeDataPointer(toc->entry[number].pHead
er); |
| 211 } |
| 212 } |
| 213 #ifdef UDATA_DEBUG |
| 214 fprintf(stderr, "%s: Not found.\n", name); |
| 215 #endif |
| 216 return NULL; |
| 217 } else { |
| 218 return pData->pHeader; |
| 219 } |
| 220 } |
| 221 |
| 222 static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCoun
t}; |
| 223 static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCou
nt}; |
| 224 |
| 225 |
| 226 |
| 227 /*----------------------------------------------------------------------* |
| 228 * * |
| 229 * checkCommonData Validate the format of a common data file. * |
| 230 * Fill in the virtual function ptr based on TOC type * |
| 231 * If the data is invalid, close the UDataMemory * |
| 232 * and set the appropriate error code. * |
| 233 * * |
| 234 *----------------------------------------------------------------------*/ |
| 235 U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { |
| 236 if (U_FAILURE(*err)) { |
| 237 return; |
| 238 } |
| 239 |
| 240 if(!(udm->pHeader->dataHeader.magic1==0xda && |
| 241 udm->pHeader->dataHeader.magic2==0x27 && |
| 242 udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && |
| 243 udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) |
| 244 ) { |
| 245 /* header not valid */ |
| 246 *err=U_INVALID_FORMAT_ERROR; |
| 247 } |
| 248 else if (udm->pHeader->info.dataFormat[0]==0x43 && |
| 249 udm->pHeader->info.dataFormat[1]==0x6d && |
| 250 udm->pHeader->info.dataFormat[2]==0x6e && |
| 251 udm->pHeader->info.dataFormat[3]==0x44 && |
| 252 udm->pHeader->info.formatVersion[0]==1 |
| 253 ) { |
| 254 /* dataFormat="CmnD" */ |
| 255 udm->vFuncs = &CmnDFuncs; |
| 256 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); |
| 257 } |
| 258 else if(udm->pHeader->info.dataFormat[0]==0x54 && |
| 259 udm->pHeader->info.dataFormat[1]==0x6f && |
| 260 udm->pHeader->info.dataFormat[2]==0x43 && |
| 261 udm->pHeader->info.dataFormat[3]==0x50 && |
| 262 udm->pHeader->info.formatVersion[0]==1 |
| 263 ) { |
| 264 /* dataFormat="ToCP" */ |
| 265 udm->vFuncs = &ToCPFuncs; |
| 266 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); |
| 267 } |
| 268 else { |
| 269 /* dataFormat not recognized */ |
| 270 *err=U_INVALID_FORMAT_ERROR; |
| 271 } |
| 272 |
| 273 if (U_FAILURE(*err)) { |
| 274 /* If the data is no good and we memory-mapped it ourselves, |
| 275 * close the memory mapping so it doesn't leak. Note that this has |
| 276 * no effect on non-memory mapped data, other than clearing fields in u
dm. |
| 277 */ |
| 278 udata_close(udm); |
| 279 } |
| 280 } |
| 281 |
| 282 /* |
| 283 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package |
| 284 * header but not its sub-items. |
| 285 * This function will be needed for automatic runtime swapping. |
| 286 * Sub-items should not be swapped to limit the swapping to the parts of the |
| 287 * package that are actually used. |
| 288 * |
| 289 * Since lengths of items are implicit in the order and offsets of their |
| 290 * ToC entries, and since offsets are relative to the start of the ToC, |
| 291 * a swapped version may need to generate a different data structure |
| 292 * with pointers to the original data items and with their lengths |
| 293 * (-1 for the last one if it is not known), and maybe even pointers to the |
| 294 * swapped versions of the items. |
| 295 * These pointers to swapped versions would establish a cache; |
| 296 * instead, each open data item could simply own the storage for its swapped |
| 297 * data. This fits better with the current design. |
| 298 * |
| 299 * markus 2003sep18 Jitterbug 2235 |
| 300 */ |
OLD | NEW |