OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 1999-2009, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * |
| 7 * ucnv_cnv.h: |
| 8 * Definitions for converter implementations. |
| 9 * |
| 10 * Modification History: |
| 11 * |
| 12 * Date Name Description |
| 13 * 05/09/00 helena Added implementation to handle fallback mappings. |
| 14 * 06/29/2000 helena Major rewrite of the callback APIs. |
| 15 */ |
| 16 |
| 17 #ifndef UCNV_CNV_H |
| 18 #define UCNV_CNV_H |
| 19 |
| 20 #include "unicode/utypes.h" |
| 21 |
| 22 #if !UCONFIG_NO_CONVERSION |
| 23 |
| 24 #include "unicode/ucnv.h" |
| 25 #include "unicode/ucnv_err.h" |
| 26 #include "unicode/uset.h" |
| 27 #include "uset_imp.h" |
| 28 |
| 29 U_CDECL_BEGIN |
| 30 |
| 31 /* this is used in fromUnicode DBCS tables as an "unassigned" marker */ |
| 32 #define missingCharMarker 0xFFFF |
| 33 |
| 34 /* |
| 35 * #define missingUCharMarker 0xfffe |
| 36 * |
| 37 * commented out because there are actually two values used in toUnicode tables: |
| 38 * U+fffe "unassigned" |
| 39 * U+ffff "illegal" |
| 40 */ |
| 41 |
| 42 /** Forward declaration, see ucnv_bld.h */ |
| 43 struct UConverterSharedData; |
| 44 typedef struct UConverterSharedData UConverterSharedData; |
| 45 |
| 46 /* function types for UConverterImpl ---------------------------------------- */ |
| 47 |
| 48 /* struct with arguments for UConverterLoad and ucnv_load() */ |
| 49 typedef struct { |
| 50 int32_t size; /* sizeof(UConverterLoadArgs) */ |
| 51 int32_t nestedLoads; /* count nested ucnv_load() calls */ |
| 52 UBool onlyTestIsLoadable; /* input: don't actually load */ |
| 53 UBool reserved0; /* reserved - for good alignment of the pointers
*/ |
| 54 int16_t reserved; /* reserved - for good alignment of the pointers
*/ |
| 55 uint32_t options; |
| 56 const char *pkg, *name, *locale; |
| 57 } UConverterLoadArgs; |
| 58 |
| 59 typedef void (*UConverterLoad) (UConverterSharedData *sharedData, |
| 60 UConverterLoadArgs *pArgs, |
| 61 const uint8_t *raw, UErrorCode *pErrorCode); |
| 62 typedef void (*UConverterUnload) (UConverterSharedData *sharedData); |
| 63 |
| 64 typedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErr
orCode *pErrorCode); |
| 65 typedef void (*UConverterClose) (UConverter *cnv); |
| 66 |
| 67 typedef enum UConverterResetChoice { |
| 68 UCNV_RESET_BOTH, |
| 69 UCNV_RESET_TO_UNICODE, |
| 70 UCNV_RESET_FROM_UNICODE |
| 71 } UConverterResetChoice; |
| 72 |
| 73 typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice); |
| 74 |
| 75 /* |
| 76 * Converter implementation function(s) for ucnv_toUnicode(). |
| 77 * If the toUnicodeWithOffsets function pointer is NULL, |
| 78 * then the toUnicode function will be used and the offsets will be set to -1. |
| 79 * |
| 80 * Must maintain state across buffers. Use toUBytes[toULength] for partial input |
| 81 * sequences; it will be checked in ucnv.c at the end of the input stream |
| 82 * to detect truncated input. |
| 83 * Some converters may need additional detection and may then set U_TRUNCATED_CH
AR_FOUND. |
| 84 * |
| 85 * The toUnicodeWithOffsets must write exactly as many offset values as target |
| 86 * units. Write offset values of -1 for when the source index corresponding to |
| 87 * the output unit is not known (e.g., the character started in an earlier buffe
r). |
| 88 * The pArgs->offsets pointer need not be moved forward. |
| 89 * |
| 90 * At function return, either one of the following conditions must be true: |
| 91 * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit |
| 92 * - another error code with toUBytes[toULength] set to the offending input |
| 93 * - no error, and the source is consumed: source==sourceLimit |
| 94 * |
| 95 * The ucnv.c code will handle the end of the input (reset) |
| 96 * (reset, and truncation detection) and callbacks. |
| 97 */ |
| 98 typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *); |
| 99 |
| 100 /* |
| 101 * Same rules as for UConverterToUnicode. |
| 102 * A lead surrogate is kept in fromUChar32 across buffers, and if an error |
| 103 * occurs, then the offending input code point must be put into fromUChar32 |
| 104 * as well. |
| 105 */ |
| 106 typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *
); |
| 107 |
| 108 /* |
| 109 * Converter implementation function for ucnv_convertEx(), for direct conversion |
| 110 * between two charsets without pivoting through UTF-16. |
| 111 * The rules are the same as for UConverterToUnicode and UConverterFromUnicode. |
| 112 * In addition, |
| 113 * - The toUnicode side must behave and keep state exactly like the |
| 114 * UConverterToUnicode implementation for the same source charset. |
| 115 * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back |
| 116 * to pivoting. When this function is called, the conversion framework makes |
| 117 * sure that this warning is not set on input. |
| 118 * - Continuing a partial match and flushing the toUnicode replay buffer |
| 119 * are handled by pivoting, using the toUnicode and fromUnicode functions. |
| 120 */ |
| 121 typedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs, |
| 122 UConverterToUnicodeArgs *pToUArgs, |
| 123 UErrorCode *pErrorCode); |
| 124 |
| 125 /* |
| 126 * Converter implementation function for ucnv_getNextUChar(). |
| 127 * If the function pointer is NULL, then the toUnicode function will be used. |
| 128 * |
| 129 * Will be called at a character boundary (toULength==0). |
| 130 * May return with |
| 131 * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input |
| 132 * (the return value will be ignored) |
| 133 * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR
!) |
| 134 * with toUBytes[toULength] set to the offending input |
| 135 * (the return value will be ignored) |
| 136 * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer, |
| 137 * to indicate that the ucnv.c code shall call the toUnicode function instead |
| 138 * - return a real code point result |
| 139 * |
| 140 * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be con
sumed. |
| 141 * |
| 142 * The ucnv.c code will handle the end of the input (reset) |
| 143 * (except for truncation detection!) and callbacks. |
| 144 */ |
| 145 typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode
*); |
| 146 |
| 147 typedef void (*UConverterGetStarters)(const UConverter* converter, |
| 148 UBool starters[256], |
| 149 UErrorCode *pErrorCode); |
| 150 |
| 151 /* If this function pointer is null or if the function returns null |
| 152 * the name field in static data struct should be returned by |
| 153 * ucnv_getName() API function |
| 154 */ |
| 155 typedef const char * (*UConverterGetName) (const UConverter *cnv); |
| 156 |
| 157 /** |
| 158 * Write the codepage substitution character. |
| 159 * If this function is not set, then ucnv_cbFromUWriteSub() writes |
| 160 * the substitution character from UConverter. |
| 161 * For stateful converters, it is typically necessary to handle this |
| 162 * specificially for the converter in order to properly maintain the state. |
| 163 */ |
| 164 typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t of
fsetIndex, UErrorCode *pErrorCode); |
| 165 |
| 166 /** |
| 167 * For converter-specific safeClone processing |
| 168 * If this function is not set, then ucnv_safeClone assumes that the converter h
as no private data that changes |
| 169 * after the converter is done opening. |
| 170 * If this function is set, then it is called just after a memcpy() of |
| 171 * converter data to the new, empty converter, and is expected to set up |
| 172 * the initial state of the converter. It is not expected to increment the |
| 173 * reference counts of the standard data types such as the shared data. |
| 174 */ |
| 175 typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, |
| 176 void *stackBuffer, |
| 177 int32_t *pBufferSize, |
| 178 UErrorCode *status); |
| 179 |
| 180 /** |
| 181 * Filters for some ucnv_getUnicodeSet() implementation code. |
| 182 */ |
| 183 typedef enum UConverterSetFilter { |
| 184 UCNV_SET_FILTER_NONE, |
| 185 UCNV_SET_FILTER_DBCS_ONLY, |
| 186 UCNV_SET_FILTER_2022_CN, |
| 187 UCNV_SET_FILTER_SJIS, |
| 188 UCNV_SET_FILTER_GR94DBCS, |
| 189 UCNV_SET_FILTER_HZ, |
| 190 UCNV_SET_FILTER_COUNT |
| 191 } UConverterSetFilter; |
| 192 |
| 193 /** |
| 194 * Fills the set of Unicode code points that can be converted by an ICU converte
r. |
| 195 * The API function ucnv_getUnicodeSet() clears the USet before calling |
| 196 * the converter's getUnicodeSet() implementation; the converter should only |
| 197 * add the appropriate code points to allow recursive use. |
| 198 * For example, the ISO-2022-JP converter will call each subconverter's |
| 199 * getUnicodeSet() implementation to consecutively add code points to |
| 200 * the same USet, which will result in a union of the sets of all subconverters. |
| 201 * |
| 202 * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. |
| 203 */ |
| 204 typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, |
| 205 const USetAdder *sa, |
| 206 UConverterUnicodeSet which, |
| 207 UErrorCode *pErrorCode); |
| 208 |
| 209 UBool CONVERSION_U_SUCCESS (UErrorCode err); |
| 210 |
| 211 /** |
| 212 * UConverterImpl contains all the data and functions for a converter type. |
| 213 * Its function pointers work much like a C++ vtable. |
| 214 * Many converter types need to define only a subset of the functions; |
| 215 * when a function pointer is NULL, then a default action will be performed. |
| 216 * |
| 217 * Every converter type must implement toUnicode, fromUnicode, and getNextUChar, |
| 218 * otherwise the converter may crash. |
| 219 * Every converter type that has variable-length codepage sequences should |
| 220 * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for |
| 221 * correct offset handling. |
| 222 * All other functions may or may not be implemented - it depends only on |
| 223 * whether the converter type needs them. |
| 224 * |
| 225 * When open() fails, then close() will be called, if present. |
| 226 */ |
| 227 struct UConverterImpl { |
| 228 UConverterType type; |
| 229 |
| 230 UConverterLoad load; |
| 231 UConverterUnload unload; |
| 232 |
| 233 UConverterOpen open; |
| 234 UConverterClose close; |
| 235 UConverterReset reset; |
| 236 |
| 237 UConverterToUnicode toUnicode; |
| 238 UConverterToUnicode toUnicodeWithOffsets; |
| 239 UConverterFromUnicode fromUnicode; |
| 240 UConverterFromUnicode fromUnicodeWithOffsets; |
| 241 UConverterGetNextUChar getNextUChar; |
| 242 |
| 243 UConverterGetStarters getStarters; |
| 244 UConverterGetName getName; |
| 245 UConverterWriteSub writeSub; |
| 246 UConverterSafeClone safeClone; |
| 247 UConverterGetUnicodeSet getUnicodeSet; |
| 248 |
| 249 UConverterConvert toUTF8; |
| 250 UConverterConvert fromUTF8; |
| 251 }; |
| 252 |
| 253 extern const UConverterSharedData |
| 254 _MBCSData, _Latin1Data, |
| 255 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, |
| 256 _ISO2022Data, |
| 257 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, |
| 258 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19
, |
| 259 _HZData,_ISCIIData, _SCSUData, _ASCIIData, |
| 260 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData; |
| 261 |
| 262 U_CDECL_END |
| 263 |
| 264 /** Always use fallbacks from codepage to Unicode */ |
| 265 #define TO_U_USE_FALLBACK(useFallback) TRUE |
| 266 #define UCNV_TO_U_USE_FALLBACK(cnv) TRUE |
| 267 |
| 268 /** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-
use code points */ |
| 269 #define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf00
00)<0x20000) |
| 270 #define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c)) |
| 271 #define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback,
c) |
| 272 |
| 273 /** |
| 274 * Magic number for ucnv_getNextUChar(), returned by a |
| 275 * getNextUChar() implementation to indicate to use the converter's toUnicode() |
| 276 * instead of the native function. |
| 277 * @internal |
| 278 */ |
| 279 #define UCNV_GET_NEXT_UCHAR_USE_TO_U -9 |
| 280 |
| 281 U_CFUNC void |
| 282 ucnv_getCompleteUnicodeSet(const UConverter *cnv, |
| 283 const USetAdder *sa, |
| 284 UConverterUnicodeSet which, |
| 285 UErrorCode *pErrorCode); |
| 286 |
| 287 U_CFUNC void |
| 288 ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, |
| 289 const USetAdder *sa, |
| 290 UConverterUnicodeSet which, |
| 291 UErrorCode *pErrorCode); |
| 292 |
| 293 U_CFUNC void |
| 294 ucnv_fromUWriteBytes(UConverter *cnv, |
| 295 const char *bytes, int32_t length, |
| 296 char **target, const char *targetLimit, |
| 297 int32_t **offsets, |
| 298 int32_t sourceIndex, |
| 299 UErrorCode *pErrorCode); |
| 300 U_CFUNC void |
| 301 ucnv_toUWriteUChars(UConverter *cnv, |
| 302 const UChar *uchars, int32_t length, |
| 303 UChar **target, const UChar *targetLimit, |
| 304 int32_t **offsets, |
| 305 int32_t sourceIndex, |
| 306 UErrorCode *pErrorCode); |
| 307 |
| 308 U_CFUNC void |
| 309 ucnv_toUWriteCodePoint(UConverter *cnv, |
| 310 UChar32 c, |
| 311 UChar **target, const UChar *targetLimit, |
| 312 int32_t **offsets, |
| 313 int32_t sourceIndex, |
| 314 UErrorCode *pErrorCode); |
| 315 |
| 316 #endif |
| 317 |
| 318 #endif /* UCNV_CNV */ |
OLD | NEW |