Index: icu46/source/common/ucnv.c |
=================================================================== |
--- icu46/source/common/ucnv.c (revision 0) |
+++ icu46/source/common/ucnv.c (revision 0) |
@@ -0,0 +1,2894 @@ |
+/* |
+****************************************************************************** |
+* |
+* Copyright (C) 1998-2010, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+* |
+****************************************************************************** |
+* |
+* ucnv.c: |
+* Implements APIs for the ICU's codeset conversion library; |
+* mostly calls through internal functions; |
+* created by Bertrand A. Damiba |
+* |
+* Modification History: |
+* |
+* Date Name Description |
+* 04/04/99 helena Fixed internal header inclusion. |
+* 05/09/00 helena Added implementation to handle fallback mappings. |
+* 06/20/2000 helena OS/400 port changes; mostly typecast. |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_CONVERSION |
+ |
+#include "unicode/ustring.h" |
+#include "unicode/ucnv.h" |
+#include "unicode/ucnv_err.h" |
+#include "unicode/uset.h" |
+#include "putilimp.h" |
+#include "cmemory.h" |
+#include "cstring.h" |
+#include "uassert.h" |
+#include "utracimp.h" |
+#include "ustr_imp.h" |
+#include "ucnv_imp.h" |
+#include "ucnv_cnv.h" |
+#include "ucnv_bld.h" |
+ |
+/* size of intermediate and preflighting buffers in ucnv_convert() */ |
+#define CHUNK_SIZE 1024 |
+ |
+typedef struct UAmbiguousConverter { |
+ const char *name; |
+ const UChar variant5c; |
+} UAmbiguousConverter; |
+ |
+static const UAmbiguousConverter ambiguousConverters[]={ |
+ { "ibm-897_P100-1995", 0xa5 }, |
+ { "ibm-942_P120-1999", 0xa5 }, |
+ { "ibm-943_P130-1999", 0xa5 }, |
+ { "ibm-946_P100-1995", 0xa5 }, |
+ { "ibm-33722_P120-1999", 0xa5 }, |
+ { "ibm-1041_P100-1995", 0xa5 }, |
+ /*{ "ibm-54191_P100-2006", 0xa5 },*/ |
+ /*{ "ibm-62383_P100-2007", 0xa5 },*/ |
+ /*{ "ibm-891_P100-1995", 0x20a9 },*/ |
+ { "ibm-944_P100-1995", 0x20a9 }, |
+ { "ibm-949_P110-1999", 0x20a9 }, |
+ { "ibm-1363_P110-1997", 0x20a9 }, |
+ { "ISO_2022,locale=ko,version=0", 0x20a9 }, |
+ { "ibm-1088_P100-1995", 0x20a9 } |
+}; |
+ |
+/*Calls through createConverter */ |
+U_CAPI UConverter* U_EXPORT2 |
+ucnv_open (const char *name, |
+ UErrorCode * err) |
+{ |
+ UConverter *r; |
+ |
+ if (err == NULL || U_FAILURE (*err)) { |
+ return NULL; |
+ } |
+ |
+ r = ucnv_createConverter(NULL, name, err); |
+ return r; |
+} |
+ |
+U_CAPI UConverter* U_EXPORT2 |
+ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) |
+{ |
+ return ucnv_createConverterFromPackage(packageName, converterName, err); |
+} |
+ |
+/*Extracts the UChar* to a char* and calls through createConverter */ |
+U_CAPI UConverter* U_EXPORT2 |
+ucnv_openU (const UChar * name, |
+ UErrorCode * err) |
+{ |
+ char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
+ |
+ if (err == NULL || U_FAILURE(*err)) |
+ return NULL; |
+ if (name == NULL) |
+ return ucnv_open (NULL, err); |
+ if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) |
+ { |
+ *err = U_ILLEGAL_ARGUMENT_ERROR; |
+ return NULL; |
+ } |
+ return ucnv_open(u_austrcpy(asciiName, name), err); |
+} |
+ |
+/* Copy the string that is represented by the UConverterPlatform enum |
+ * @param platformString An output buffer |
+ * @param platform An enum representing a platform |
+ * @return the length of the copied string. |
+ */ |
+static int32_t |
+ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) |
+{ |
+ switch (pltfrm) |
+ { |
+ case UCNV_IBM: |
+ uprv_strcpy(platformString, "ibm-"); |
+ return 4; |
+ case UCNV_UNKNOWN: |
+ break; |
+ } |
+ |
+ /* default to empty string */ |
+ *platformString = 0; |
+ return 0; |
+} |
+ |
+/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls |
+ *through createConverter*/ |
+U_CAPI UConverter* U_EXPORT2 |
+ucnv_openCCSID (int32_t codepage, |
+ UConverterPlatform platform, |
+ UErrorCode * err) |
+{ |
+ char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
+ int32_t myNameLen; |
+ |
+ if (err == NULL || U_FAILURE (*err)) |
+ return NULL; |
+ |
+ /* ucnv_copyPlatformString could return "ibm-" or "cp" */ |
+ myNameLen = ucnv_copyPlatformString(myName, platform); |
+ T_CString_integerToString(myName + myNameLen, codepage, 10); |
+ |
+ return ucnv_createConverter(NULL, myName, err); |
+} |
+ |
+/* Creating a temporary stack-based object that can be used in one thread, |
+and created from a converter that is shared across threads. |
+*/ |
+ |
+U_CAPI UConverter* U_EXPORT2 |
+ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) |
+{ |
+ UConverter *localConverter, *allocatedConverter; |
+ int32_t bufferSizeNeeded; |
+ char *stackBufferChars = (char *)stackBuffer; |
+ UErrorCode cbErr; |
+ UConverterToUnicodeArgs toUArgs = { |
+ sizeof(UConverterToUnicodeArgs), |
+ TRUE, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL |
+ }; |
+ UConverterFromUnicodeArgs fromUArgs = { |
+ sizeof(UConverterFromUnicodeArgs), |
+ TRUE, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL |
+ }; |
+ |
+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); |
+ |
+ if (status == NULL || U_FAILURE(*status)){ |
+ UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); |
+ return 0; |
+ } |
+ |
+ if (!pBufferSize || !cnv){ |
+ *status = U_ILLEGAL_ARGUMENT_ERROR; |
+ UTRACE_EXIT_STATUS(*status); |
+ return 0; |
+ } |
+ |
+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", |
+ ucnv_getName(cnv, status), cnv, stackBuffer); |
+ |
+ if (cnv->sharedData->impl->safeClone != NULL) { |
+ /* call the custom safeClone function for sizing */ |
+ bufferSizeNeeded = 0; |
+ cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); |
+ } |
+ else |
+ { |
+ /* inherent sizing */ |
+ bufferSizeNeeded = sizeof(UConverter); |
+ } |
+ |
+ if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ |
+ *pBufferSize = bufferSizeNeeded; |
+ UTRACE_EXIT_VALUE(bufferSizeNeeded); |
+ return 0; |
+ } |
+ |
+ |
+ /* Pointers on 64-bit platforms need to be aligned |
+ * on a 64-bit boundary in memory. |
+ */ |
+ if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { |
+ int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); |
+ if(*pBufferSize > offsetUp) { |
+ *pBufferSize -= offsetUp; |
+ stackBufferChars += offsetUp; |
+ } else { |
+ /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ |
+ *pBufferSize = 1; |
+ } |
+ } |
+ |
+ stackBuffer = (void *)stackBufferChars; |
+ |
+ /* Now, see if we must allocate any memory */ |
+ if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL) |
+ { |
+ /* allocate one here...*/ |
+ localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); |
+ |
+ if(localConverter == NULL) { |
+ *status = U_MEMORY_ALLOCATION_ERROR; |
+ UTRACE_EXIT_STATUS(*status); |
+ return NULL; |
+ } |
+ |
+ if (U_SUCCESS(*status)) { |
+ *status = U_SAFECLONE_ALLOCATED_WARNING; |
+ } |
+ |
+ /* record the fact that memory was allocated */ |
+ *pBufferSize = bufferSizeNeeded; |
+ } else { |
+ /* just use the stack buffer */ |
+ localConverter = (UConverter*) stackBuffer; |
+ allocatedConverter = NULL; |
+ } |
+ |
+ uprv_memset(localConverter, 0, bufferSizeNeeded); |
+ |
+ /* Copy initial state */ |
+ uprv_memcpy(localConverter, cnv, sizeof(UConverter)); |
+ localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; |
+ |
+ /* copy the substitution string */ |
+ if (cnv->subChars == (uint8_t *)cnv->subUChars) { |
+ localConverter->subChars = (uint8_t *)localConverter->subUChars; |
+ } else { |
+ localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
+ if (localConverter->subChars == NULL) { |
+ uprv_free(allocatedConverter); |
+ UTRACE_EXIT_STATUS(*status); |
+ return NULL; |
+ } |
+ uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
+ } |
+ |
+ /* now either call the safeclone fcn or not */ |
+ if (cnv->sharedData->impl->safeClone != NULL) { |
+ /* call the custom safeClone function */ |
+ localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); |
+ } |
+ |
+ if(localConverter==NULL || U_FAILURE(*status)) { |
+ if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { |
+ uprv_free(allocatedConverter->subChars); |
+ } |
+ uprv_free(allocatedConverter); |
+ UTRACE_EXIT_STATUS(*status); |
+ return NULL; |
+ } |
+ |
+ /* increment refcount of shared data if needed */ |
+ /* |
+ Checking whether it's an algorithic converter is okay |
+ in multithreaded applications because the value never changes. |
+ Don't check referenceCounter for any other value. |
+ */ |
+ if (cnv->sharedData->referenceCounter != ~0) { |
+ ucnv_incrementRefCount(cnv->sharedData); |
+ } |
+ |
+ if(localConverter == (UConverter*)stackBuffer) { |
+ /* we're using user provided data - set to not destroy */ |
+ localConverter->isCopyLocal = TRUE; |
+ } |
+ |
+ /* allow callback functions to handle any memory allocation */ |
+ toUArgs.converter = fromUArgs.converter = localConverter; |
+ cbErr = U_ZERO_ERROR; |
+ cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); |
+ cbErr = U_ZERO_ERROR; |
+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); |
+ |
+ UTRACE_EXIT_PTR_STATUS(localConverter, *status); |
+ return localConverter; |
+} |
+ |
+ |
+ |
+/*Decreases the reference counter in the shared immutable section of the object |
+ *and frees the mutable part*/ |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_close (UConverter * converter) |
+{ |
+ UErrorCode errorCode = U_ZERO_ERROR; |
+ |
+ UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); |
+ |
+ if (converter == NULL) |
+ { |
+ UTRACE_EXIT(); |
+ return; |
+ } |
+ |
+ UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", |
+ ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); |
+ |
+ /* In order to speed up the close, only call the callbacks when they have been changed. |
+ This performance check will only work when the callbacks are set within a shared library |
+ or from user code that statically links this code. */ |
+ /* first, notify the callback functions that the converter is closed */ |
+ if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { |
+ UConverterToUnicodeArgs toUArgs = { |
+ sizeof(UConverterToUnicodeArgs), |
+ TRUE, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL |
+ }; |
+ |
+ toUArgs.converter = converter; |
+ errorCode = U_ZERO_ERROR; |
+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); |
+ } |
+ if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { |
+ UConverterFromUnicodeArgs fromUArgs = { |
+ sizeof(UConverterFromUnicodeArgs), |
+ TRUE, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL |
+ }; |
+ fromUArgs.converter = converter; |
+ errorCode = U_ZERO_ERROR; |
+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); |
+ } |
+ |
+ if (converter->sharedData->impl->close != NULL) { |
+ converter->sharedData->impl->close(converter); |
+ } |
+ |
+ if (converter->subChars != (uint8_t *)converter->subUChars) { |
+ uprv_free(converter->subChars); |
+ } |
+ |
+ /* |
+ Checking whether it's an algorithic converter is okay |
+ in multithreaded applications because the value never changes. |
+ Don't check referenceCounter for any other value. |
+ */ |
+ if (converter->sharedData->referenceCounter != ~0) { |
+ ucnv_unloadSharedDataIfReady(converter->sharedData); |
+ } |
+ |
+ if(!converter->isCopyLocal){ |
+ uprv_free(converter); |
+ } |
+ |
+ UTRACE_EXIT(); |
+} |
+ |
+/*returns a single Name from the list, will return NULL if out of bounds |
+ */ |
+U_CAPI const char* U_EXPORT2 |
+ucnv_getAvailableName (int32_t n) |
+{ |
+ if (0 <= n && n <= 0xffff) { |
+ UErrorCode err = U_ZERO_ERROR; |
+ const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); |
+ if (U_SUCCESS(err)) { |
+ return name; |
+ } |
+ } |
+ return NULL; |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_countAvailable () |
+{ |
+ UErrorCode err = U_ZERO_ERROR; |
+ return ucnv_bld_countAvailableConverters(&err); |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_getSubstChars (const UConverter * converter, |
+ char *mySubChar, |
+ int8_t * len, |
+ UErrorCode * err) |
+{ |
+ if (U_FAILURE (*err)) |
+ return; |
+ |
+ if (converter->subCharLen <= 0) { |
+ /* Unicode string or empty string from ucnv_setSubstString(). */ |
+ *len = 0; |
+ return; |
+ } |
+ |
+ if (*len < converter->subCharLen) /*not enough space in subChars */ |
+ { |
+ *err = U_INDEX_OUTOFBOUNDS_ERROR; |
+ return; |
+ } |
+ |
+ uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ |
+ *len = converter->subCharLen; /*store # of bytes copied to buffer */ |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_setSubstChars (UConverter * converter, |
+ const char *mySubChar, |
+ int8_t len, |
+ UErrorCode * err) |
+{ |
+ if (U_FAILURE (*err)) |
+ return; |
+ |
+ /*Makes sure that the subChar is within the codepages char length boundaries */ |
+ if ((len > converter->sharedData->staticData->maxBytesPerChar) |
+ || (len < converter->sharedData->staticData->minBytesPerChar)) |
+ { |
+ *err = U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ |
+ converter->subCharLen = len; /*sets the new len */ |
+ |
+ /* |
+ * There is currently (2001Feb) no separate API to set/get subChar1. |
+ * In order to always have subChar written after it is explicitly set, |
+ * we set subChar1 to 0. |
+ */ |
+ converter->subChar1 = 0; |
+ |
+ return; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_setSubstString(UConverter *cnv, |
+ const UChar *s, |
+ int32_t length, |
+ UErrorCode *err) { |
+ UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; |
+ char chars[UCNV_ERROR_BUFFER_LENGTH]; |
+ |
+ UConverter *clone; |
+ uint8_t *subChars; |
+ int32_t cloneSize, length8; |
+ |
+ /* Let the following functions check all arguments. */ |
+ cloneSize = sizeof(cloneBuffer); |
+ clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); |
+ ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); |
+ length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); |
+ ucnv_close(clone); |
+ if (U_FAILURE(*err)) { |
+ return; |
+ } |
+ |
+ if (cnv->sharedData->impl->writeSub == NULL |
+#if !UCONFIG_NO_LEGACY_CONVERSION |
+ || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && |
+ ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) |
+#endif |
+ ) { |
+ /* The converter is not stateful. Store the charset bytes as a fixed string. */ |
+ subChars = (uint8_t *)chars; |
+ } else { |
+ /* |
+ * The converter has a non-default writeSub() function, indicating |
+ * that it is stateful. |
+ * Store the Unicode string for on-the-fly conversion for correct |
+ * state handling. |
+ */ |
+ if (length > UCNV_ERROR_BUFFER_LENGTH) { |
+ /* |
+ * Should not occur. The converter should output at least one byte |
+ * per UChar, which means that ucnv_fromUChars() should catch all |
+ * overflows. |
+ */ |
+ *err = U_BUFFER_OVERFLOW_ERROR; |
+ return; |
+ } |
+ subChars = (uint8_t *)s; |
+ if (length < 0) { |
+ length = u_strlen(s); |
+ } |
+ length8 = length * U_SIZEOF_UCHAR; |
+ } |
+ |
+ /* |
+ * For storing the substitution string, select either the small buffer inside |
+ * UConverter or allocate a subChars buffer. |
+ */ |
+ if (length8 > UCNV_MAX_SUBCHAR_LEN) { |
+ /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ |
+ if (cnv->subChars == (uint8_t *)cnv->subUChars) { |
+ /* Allocate a new buffer for the string. */ |
+ cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
+ if (cnv->subChars == NULL) { |
+ cnv->subChars = (uint8_t *)cnv->subUChars; |
+ *err = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
+ } |
+ } |
+ |
+ /* Copy the substitution string into the UConverter or its subChars buffer. */ |
+ if (length8 == 0) { |
+ cnv->subCharLen = 0; |
+ } else { |
+ uprv_memcpy(cnv->subChars, subChars, length8); |
+ if (subChars == (uint8_t *)chars) { |
+ cnv->subCharLen = (int8_t)length8; |
+ } else /* subChars == s */ { |
+ cnv->subCharLen = (int8_t)-length; |
+ } |
+ } |
+ |
+ /* See comment in ucnv_setSubstChars(). */ |
+ cnv->subChar1 = 0; |
+} |
+ |
+/*resets the internal states of a converter |
+ *goal : have the same behaviour than a freshly created converter |
+ */ |
+static void _reset(UConverter *converter, UConverterResetChoice choice, |
+ UBool callCallback) { |
+ if(converter == NULL) { |
+ return; |
+ } |
+ |
+ if(callCallback) { |
+ /* first, notify the callback functions that the converter is reset */ |
+ UErrorCode errorCode; |
+ |
+ if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { |
+ UConverterToUnicodeArgs toUArgs = { |
+ sizeof(UConverterToUnicodeArgs), |
+ TRUE, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL |
+ }; |
+ toUArgs.converter = converter; |
+ errorCode = U_ZERO_ERROR; |
+ converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); |
+ } |
+ if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { |
+ UConverterFromUnicodeArgs fromUArgs = { |
+ sizeof(UConverterFromUnicodeArgs), |
+ TRUE, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL, |
+ NULL |
+ }; |
+ fromUArgs.converter = converter; |
+ errorCode = U_ZERO_ERROR; |
+ converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); |
+ } |
+ } |
+ |
+ /* now reset the converter itself */ |
+ if(choice<=UCNV_RESET_TO_UNICODE) { |
+ converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; |
+ converter->mode = 0; |
+ converter->toULength = 0; |
+ converter->invalidCharLength = converter->UCharErrorBufferLength = 0; |
+ converter->preToULength = 0; |
+ } |
+ if(choice!=UCNV_RESET_TO_UNICODE) { |
+ converter->fromUnicodeStatus = 0; |
+ converter->fromUChar32 = 0; |
+ converter->invalidUCharLength = converter->charErrorBufferLength = 0; |
+ converter->preFromUFirstCP = U_SENTINEL; |
+ converter->preFromULength = 0; |
+ } |
+ |
+ if (converter->sharedData->impl->reset != NULL) { |
+ /* call the custom reset function */ |
+ converter->sharedData->impl->reset(converter, choice); |
+ } |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_reset(UConverter *converter) |
+{ |
+ _reset(converter, UCNV_RESET_BOTH, TRUE); |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_resetToUnicode(UConverter *converter) |
+{ |
+ _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_resetFromUnicode(UConverter *converter) |
+{ |
+ _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); |
+} |
+ |
+U_CAPI int8_t U_EXPORT2 |
+ucnv_getMaxCharSize (const UConverter * converter) |
+{ |
+ return converter->maxBytesPerUChar; |
+} |
+ |
+ |
+U_CAPI int8_t U_EXPORT2 |
+ucnv_getMinCharSize (const UConverter * converter) |
+{ |
+ return converter->sharedData->staticData->minBytesPerChar; |
+} |
+ |
+U_CAPI const char* U_EXPORT2 |
+ucnv_getName (const UConverter * converter, UErrorCode * err) |
+ |
+{ |
+ if (U_FAILURE (*err)) |
+ return NULL; |
+ if(converter->sharedData->impl->getName){ |
+ const char* temp= converter->sharedData->impl->getName(converter); |
+ if(temp) |
+ return temp; |
+ } |
+ return converter->sharedData->staticData->name; |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_getCCSID(const UConverter * converter, |
+ UErrorCode * err) |
+{ |
+ int32_t ccsid; |
+ if (U_FAILURE (*err)) |
+ return -1; |
+ |
+ ccsid = converter->sharedData->staticData->codepage; |
+ if (ccsid == 0) { |
+ /* Rare case. This is for cases like gb18030, |
+ which doesn't have an IBM cannonical name, but does have an IBM alias. */ |
+ const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); |
+ if (U_SUCCESS(*err) && standardName) { |
+ const char *ccsidStr = uprv_strchr(standardName, '-'); |
+ if (ccsidStr) { |
+ ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ |
+ } |
+ } |
+ } |
+ return ccsid; |
+} |
+ |
+ |
+U_CAPI UConverterPlatform U_EXPORT2 |
+ucnv_getPlatform (const UConverter * converter, |
+ UErrorCode * err) |
+{ |
+ if (U_FAILURE (*err)) |
+ return UCNV_UNKNOWN; |
+ |
+ return (UConverterPlatform)converter->sharedData->staticData->platform; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ ucnv_getToUCallBack (const UConverter * converter, |
+ UConverterToUCallback *action, |
+ const void **context) |
+{ |
+ *action = converter->fromCharErrorBehaviour; |
+ *context = converter->toUContext; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ ucnv_getFromUCallBack (const UConverter * converter, |
+ UConverterFromUCallback *action, |
+ const void **context) |
+{ |
+ *action = converter->fromUCharErrorBehaviour; |
+ *context = converter->fromUContext; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_setToUCallBack (UConverter * converter, |
+ UConverterToUCallback newAction, |
+ const void* newContext, |
+ UConverterToUCallback *oldAction, |
+ const void** oldContext, |
+ UErrorCode * err) |
+{ |
+ if (U_FAILURE (*err)) |
+ return; |
+ if (oldAction) *oldAction = converter->fromCharErrorBehaviour; |
+ converter->fromCharErrorBehaviour = newAction; |
+ if (oldContext) *oldContext = converter->toUContext; |
+ converter->toUContext = newContext; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_setFromUCallBack (UConverter * converter, |
+ UConverterFromUCallback newAction, |
+ const void* newContext, |
+ UConverterFromUCallback *oldAction, |
+ const void** oldContext, |
+ UErrorCode * err) |
+{ |
+ if (U_FAILURE (*err)) |
+ return; |
+ if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; |
+ converter->fromUCharErrorBehaviour = newAction; |
+ if (oldContext) *oldContext = converter->fromUContext; |
+ converter->fromUContext = newContext; |
+} |
+ |
+static void |
+_updateOffsets(int32_t *offsets, int32_t length, |
+ int32_t sourceIndex, int32_t errorInputLength) { |
+ int32_t *limit; |
+ int32_t delta, offset; |
+ |
+ if(sourceIndex>=0) { |
+ /* |
+ * adjust each offset by adding the previous sourceIndex |
+ * minus the length of the input sequence that caused an |
+ * error, if any |
+ */ |
+ delta=sourceIndex-errorInputLength; |
+ } else { |
+ /* |
+ * set each offset to -1 because this conversion function |
+ * does not handle offsets |
+ */ |
+ delta=-1; |
+ } |
+ |
+ limit=offsets+length; |
+ if(delta==0) { |
+ /* most common case, nothing to do */ |
+ } else if(delta>0) { |
+ /* add the delta to each offset (but not if the offset is <0) */ |
+ while(offsets<limit) { |
+ offset=*offsets; |
+ if(offset>=0) { |
+ *offsets=offset+delta; |
+ } |
+ ++offsets; |
+ } |
+ } else /* delta<0 */ { |
+ /* |
+ * set each offset to -1 because this conversion function |
+ * does not handle offsets |
+ * or the error input sequence started in a previous buffer |
+ */ |
+ while(offsets<limit) { |
+ *offsets++=-1; |
+ } |
+ } |
+} |
+ |
+/* ucnv_fromUnicode --------------------------------------------------------- */ |
+ |
+/* |
+ * Implementation note for m:n conversions |
+ * |
+ * While collecting source units to find the longest match for m:n conversion, |
+ * some source units may need to be stored for a partial match. |
+ * When a second buffer does not yield a match on all of the previously stored |
+ * source units, then they must be "replayed", i.e., fed back into the converter. |
+ * |
+ * The code relies on the fact that replaying will not nest - |
+ * converting a replay buffer will not result in a replay. |
+ * This is because a replay is necessary only after the _continuation_ of a |
+ * partial match failed, but a replay buffer is converted as a whole. |
+ * It may result in some of its units being stored again for a partial match, |
+ * but there will not be a continuation _during_ the replay which could fail. |
+ * |
+ * It is conceivable that a callback function could call the converter |
+ * recursively in a way that causes another replay to be stored, but that |
+ * would be an error in the callback function. |
+ * Such violations will cause assertion failures in a debug build, |
+ * and wrong output, but they will not cause a crash. |
+ */ |
+ |
+static void |
+_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { |
+ UConverterFromUnicode fromUnicode; |
+ UConverter *cnv; |
+ const UChar *s; |
+ char *t; |
+ int32_t *offsets; |
+ int32_t sourceIndex; |
+ int32_t errorInputLength; |
+ UBool converterSawEndOfInput, calledCallback; |
+ |
+ /* variables for m:n conversion */ |
+ UChar replay[UCNV_EXT_MAX_UCHARS]; |
+ const UChar *realSource, *realSourceLimit; |
+ int32_t realSourceIndex; |
+ UBool realFlush; |
+ |
+ cnv=pArgs->converter; |
+ s=pArgs->source; |
+ t=pArgs->target; |
+ offsets=pArgs->offsets; |
+ |
+ /* get the converter implementation function */ |
+ sourceIndex=0; |
+ if(offsets==NULL) { |
+ fromUnicode=cnv->sharedData->impl->fromUnicode; |
+ } else { |
+ fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; |
+ if(fromUnicode==NULL) { |
+ /* there is no WithOffsets implementation */ |
+ fromUnicode=cnv->sharedData->impl->fromUnicode; |
+ /* we will write -1 for each offset */ |
+ sourceIndex=-1; |
+ } |
+ } |
+ |
+ if(cnv->preFromULength>=0) { |
+ /* normal mode */ |
+ realSource=NULL; |
+ |
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ |
+ realSourceLimit=NULL; |
+ realFlush=FALSE; |
+ realSourceIndex=0; |
+ } else { |
+ /* |
+ * Previous m:n conversion stored source units from a partial match |
+ * and failed to consume all of them. |
+ * We need to "replay" them from a temporary buffer and convert them first. |
+ */ |
+ realSource=pArgs->source; |
+ realSourceLimit=pArgs->sourceLimit; |
+ realFlush=pArgs->flush; |
+ realSourceIndex=sourceIndex; |
+ |
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); |
+ pArgs->source=replay; |
+ pArgs->sourceLimit=replay-cnv->preFromULength; |
+ pArgs->flush=FALSE; |
+ sourceIndex=-1; |
+ |
+ cnv->preFromULength=0; |
+ } |
+ |
+ /* |
+ * loop for conversion and error handling |
+ * |
+ * loop { |
+ * convert |
+ * loop { |
+ * update offsets |
+ * handle end of input |
+ * handle errors/call callback |
+ * } |
+ * } |
+ */ |
+ for(;;) { |
+ if(U_SUCCESS(*err)) { |
+ /* convert */ |
+ fromUnicode(pArgs, err); |
+ |
+ /* |
+ * set a flag for whether the converter |
+ * successfully processed the end of the input |
+ * |
+ * need not check cnv->preFromULength==0 because a replay (<0) will cause |
+ * s<sourceLimit before converterSawEndOfInput is checked |
+ */ |
+ converterSawEndOfInput= |
+ (UBool)(U_SUCCESS(*err) && |
+ pArgs->flush && pArgs->source==pArgs->sourceLimit && |
+ cnv->fromUChar32==0); |
+ } else { |
+ /* handle error from ucnv_convertEx() */ |
+ converterSawEndOfInput=FALSE; |
+ } |
+ |
+ /* no callback called yet for this iteration */ |
+ calledCallback=FALSE; |
+ |
+ /* no sourceIndex adjustment for conversion, only for callback output */ |
+ errorInputLength=0; |
+ |
+ /* |
+ * loop for offsets and error handling |
+ * |
+ * iterates at most 3 times: |
+ * 1. to clean up after the conversion function |
+ * 2. after the callback |
+ * 3. after the callback again if there was truncated input |
+ */ |
+ for(;;) { |
+ /* update offsets if we write any */ |
+ if(offsets!=NULL) { |
+ int32_t length=(int32_t)(pArgs->target-t); |
+ if(length>0) { |
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength); |
+ |
+ /* |
+ * if a converter handles offsets and updates the offsets |
+ * pointer at the end, then pArgs->offset should not change |
+ * here; |
+ * however, some converters do not handle offsets at all |
+ * (sourceIndex<0) or may not update the offsets pointer |
+ */ |
+ pArgs->offsets=offsets+=length; |
+ } |
+ |
+ if(sourceIndex>=0) { |
+ sourceIndex+=(int32_t)(pArgs->source-s); |
+ } |
+ } |
+ |
+ if(cnv->preFromULength<0) { |
+ /* |
+ * switch the source to new replay units (cannot occur while replaying) |
+ * after offset handling and before end-of-input and callback handling |
+ */ |
+ if(realSource==NULL) { |
+ realSource=pArgs->source; |
+ realSourceLimit=pArgs->sourceLimit; |
+ realFlush=pArgs->flush; |
+ realSourceIndex=sourceIndex; |
+ |
+ uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); |
+ pArgs->source=replay; |
+ pArgs->sourceLimit=replay-cnv->preFromULength; |
+ pArgs->flush=FALSE; |
+ if((sourceIndex+=cnv->preFromULength)<0) { |
+ sourceIndex=-1; |
+ } |
+ |
+ cnv->preFromULength=0; |
+ } else { |
+ /* see implementation note before _fromUnicodeWithCallback() */ |
+ U_ASSERT(realSource==NULL); |
+ *err=U_INTERNAL_PROGRAM_ERROR; |
+ } |
+ } |
+ |
+ /* update pointers */ |
+ s=pArgs->source; |
+ t=pArgs->target; |
+ |
+ if(U_SUCCESS(*err)) { |
+ if(s<pArgs->sourceLimit) { |
+ /* |
+ * continue with the conversion loop while there is still input left |
+ * (continue converting by breaking out of only the inner loop) |
+ */ |
+ break; |
+ } else if(realSource!=NULL) { |
+ /* switch back from replaying to the real source and continue */ |
+ pArgs->source=realSource; |
+ pArgs->sourceLimit=realSourceLimit; |
+ pArgs->flush=realFlush; |
+ sourceIndex=realSourceIndex; |
+ |
+ realSource=NULL; |
+ break; |
+ } else if(pArgs->flush && cnv->fromUChar32!=0) { |
+ /* |
+ * the entire input stream is consumed |
+ * and there is a partial, truncated input sequence left |
+ */ |
+ |
+ /* inject an error and continue with callback handling */ |
+ *err=U_TRUNCATED_CHAR_FOUND; |
+ calledCallback=FALSE; /* new error condition */ |
+ } else { |
+ /* input consumed */ |
+ if(pArgs->flush) { |
+ /* |
+ * return to the conversion loop once more if the flush |
+ * flag is set and the conversion function has not |
+ * successfully processed the end of the input yet |
+ * |
+ * (continue converting by breaking out of only the inner loop) |
+ */ |
+ if(!converterSawEndOfInput) { |
+ break; |
+ } |
+ |
+ /* reset the converter without calling the callback function */ |
+ _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); |
+ } |
+ |
+ /* done successfully */ |
+ return; |
+ } |
+ } |
+ |
+ /* U_FAILURE(*err) */ |
+ { |
+ UErrorCode e; |
+ |
+ if( calledCallback || |
+ (e=*err)==U_BUFFER_OVERFLOW_ERROR || |
+ (e!=U_INVALID_CHAR_FOUND && |
+ e!=U_ILLEGAL_CHAR_FOUND && |
+ e!=U_TRUNCATED_CHAR_FOUND) |
+ ) { |
+ /* |
+ * the callback did not or cannot resolve the error: |
+ * set output pointers and return |
+ * |
+ * the check for buffer overflow is redundant but it is |
+ * a high-runner case and hopefully documents the intent |
+ * well |
+ * |
+ * if we were replaying, then the replay buffer must be |
+ * copied back into the UConverter |
+ * and the real arguments must be restored |
+ */ |
+ if(realSource!=NULL) { |
+ int32_t length; |
+ |
+ U_ASSERT(cnv->preFromULength==0); |
+ |
+ length=(int32_t)(pArgs->sourceLimit-pArgs->source); |
+ if(length>0) { |
+ uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); |
+ cnv->preFromULength=(int8_t)-length; |
+ } |
+ |
+ pArgs->source=realSource; |
+ pArgs->sourceLimit=realSourceLimit; |
+ pArgs->flush=realFlush; |
+ } |
+ |
+ return; |
+ } |
+ } |
+ |
+ /* callback handling */ |
+ { |
+ UChar32 codePoint; |
+ |
+ /* get and write the code point */ |
+ codePoint=cnv->fromUChar32; |
+ errorInputLength=0; |
+ U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); |
+ cnv->invalidUCharLength=(int8_t)errorInputLength; |
+ |
+ /* set the converter state to deal with the next character */ |
+ cnv->fromUChar32=0; |
+ |
+ /* call the callback function */ |
+ cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, |
+ cnv->invalidUCharBuffer, errorInputLength, codePoint, |
+ *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, |
+ err); |
+ } |
+ |
+ /* |
+ * loop back to the offset handling |
+ * |
+ * this flag will indicate after offset handling |
+ * that a callback was called; |
+ * if the callback did not resolve the error, then we return |
+ */ |
+ calledCallback=TRUE; |
+ } |
+ } |
+} |
+ |
+/* |
+ * Output the fromUnicode overflow buffer. |
+ * Call this function if(cnv->charErrorBufferLength>0). |
+ * @return TRUE if overflow |
+ */ |
+static UBool |
+ucnv_outputOverflowFromUnicode(UConverter *cnv, |
+ char **target, const char *targetLimit, |
+ int32_t **pOffsets, |
+ UErrorCode *err) { |
+ int32_t *offsets; |
+ char *overflow, *t; |
+ int32_t i, length; |
+ |
+ t=*target; |
+ if(pOffsets!=NULL) { |
+ offsets=*pOffsets; |
+ } else { |
+ offsets=NULL; |
+ } |
+ |
+ overflow=(char *)cnv->charErrorBuffer; |
+ length=cnv->charErrorBufferLength; |
+ i=0; |
+ while(i<length) { |
+ if(t==targetLimit) { |
+ /* the overflow buffer contains too much, keep the rest */ |
+ int32_t j=0; |
+ |
+ do { |
+ overflow[j++]=overflow[i++]; |
+ } while(i<length); |
+ |
+ cnv->charErrorBufferLength=(int8_t)j; |
+ *target=t; |
+ if(offsets!=NULL) { |
+ *pOffsets=offsets; |
+ } |
+ *err=U_BUFFER_OVERFLOW_ERROR; |
+ return TRUE; |
+ } |
+ |
+ /* copy the overflow contents to the target */ |
+ *t++=overflow[i++]; |
+ if(offsets!=NULL) { |
+ *offsets++=-1; /* no source index available for old output */ |
+ } |
+ } |
+ |
+ /* the overflow buffer is completely copied to the target */ |
+ cnv->charErrorBufferLength=0; |
+ *target=t; |
+ if(offsets!=NULL) { |
+ *pOffsets=offsets; |
+ } |
+ return FALSE; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_fromUnicode(UConverter *cnv, |
+ char **target, const char *targetLimit, |
+ const UChar **source, const UChar *sourceLimit, |
+ int32_t *offsets, |
+ UBool flush, |
+ UErrorCode *err) { |
+ UConverterFromUnicodeArgs args; |
+ const UChar *s; |
+ char *t; |
+ |
+ /* check parameters */ |
+ if(err==NULL || U_FAILURE(*err)) { |
+ return; |
+ } |
+ |
+ if(cnv==NULL || target==NULL || source==NULL) { |
+ *err=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ s=*source; |
+ t=*target; |
+ |
+ if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { |
+ /* |
+ Prevent code from going into an infinite loop in case we do hit this |
+ limit. The limit pointer is expected to be on a UChar * boundary. |
+ This also prevents the next argument check from failing. |
+ */ |
+ sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); |
+ } |
+ |
+ /* |
+ * All these conditions should never happen. |
+ * |
+ * 1) Make sure that the limits are >= to the address source or target |
+ * |
+ * 2) Make sure that the buffer sizes do not exceed the number range for |
+ * int32_t because some functions use the size (in units or bytes) |
+ * rather than comparing pointers, and because offsets are int32_t values. |
+ * |
+ * size_t is guaranteed to be unsigned and large enough for the job. |
+ * |
+ * Return with an error instead of adjusting the limits because we would |
+ * not be able to maintain the semantics that either the source must be |
+ * consumed or the target filled (unless an error occurs). |
+ * An adjustment would be targetLimit=t+0x7fffffff; for example. |
+ * |
+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer |
+ * to a char * pointer and provide an incomplete UChar code unit. |
+ */ |
+ if (sourceLimit<s || targetLimit<t || |
+ ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || |
+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || |
+ (((const char *)sourceLimit-(const char *)s) & 1) != 0) |
+ { |
+ *err=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ /* output the target overflow buffer */ |
+ if( cnv->charErrorBufferLength>0 && |
+ ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) |
+ ) { |
+ /* U_BUFFER_OVERFLOW_ERROR */ |
+ return; |
+ } |
+ /* *target may have moved, therefore stop using t */ |
+ |
+ if(!flush && s==sourceLimit && cnv->preFromULength>=0) { |
+ /* the overflow buffer is emptied and there is no new input: we are done */ |
+ return; |
+ } |
+ |
+ /* |
+ * Do not simply return with a buffer overflow error if |
+ * !flush && t==targetLimit |
+ * because it is possible that the source will not generate any output. |
+ * For example, the skip callback may be called; |
+ * it does not output anything. |
+ */ |
+ |
+ /* prepare the converter arguments */ |
+ args.converter=cnv; |
+ args.flush=flush; |
+ args.offsets=offsets; |
+ args.source=s; |
+ args.sourceLimit=sourceLimit; |
+ args.target=*target; |
+ args.targetLimit=targetLimit; |
+ args.size=sizeof(args); |
+ |
+ _fromUnicodeWithCallback(&args, err); |
+ |
+ *source=args.source; |
+ *target=args.target; |
+} |
+ |
+/* ucnv_toUnicode() --------------------------------------------------------- */ |
+ |
+static void |
+_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { |
+ UConverterToUnicode toUnicode; |
+ UConverter *cnv; |
+ const char *s; |
+ UChar *t; |
+ int32_t *offsets; |
+ int32_t sourceIndex; |
+ int32_t errorInputLength; |
+ UBool converterSawEndOfInput, calledCallback; |
+ |
+ /* variables for m:n conversion */ |
+ char replay[UCNV_EXT_MAX_BYTES]; |
+ const char *realSource, *realSourceLimit; |
+ int32_t realSourceIndex; |
+ UBool realFlush; |
+ |
+ cnv=pArgs->converter; |
+ s=pArgs->source; |
+ t=pArgs->target; |
+ offsets=pArgs->offsets; |
+ |
+ /* get the converter implementation function */ |
+ sourceIndex=0; |
+ if(offsets==NULL) { |
+ toUnicode=cnv->sharedData->impl->toUnicode; |
+ } else { |
+ toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; |
+ if(toUnicode==NULL) { |
+ /* there is no WithOffsets implementation */ |
+ toUnicode=cnv->sharedData->impl->toUnicode; |
+ /* we will write -1 for each offset */ |
+ sourceIndex=-1; |
+ } |
+ } |
+ |
+ if(cnv->preToULength>=0) { |
+ /* normal mode */ |
+ realSource=NULL; |
+ |
+ /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ |
+ realSourceLimit=NULL; |
+ realFlush=FALSE; |
+ realSourceIndex=0; |
+ } else { |
+ /* |
+ * Previous m:n conversion stored source units from a partial match |
+ * and failed to consume all of them. |
+ * We need to "replay" them from a temporary buffer and convert them first. |
+ */ |
+ realSource=pArgs->source; |
+ realSourceLimit=pArgs->sourceLimit; |
+ realFlush=pArgs->flush; |
+ realSourceIndex=sourceIndex; |
+ |
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); |
+ pArgs->source=replay; |
+ pArgs->sourceLimit=replay-cnv->preToULength; |
+ pArgs->flush=FALSE; |
+ sourceIndex=-1; |
+ |
+ cnv->preToULength=0; |
+ } |
+ |
+ /* |
+ * loop for conversion and error handling |
+ * |
+ * loop { |
+ * convert |
+ * loop { |
+ * update offsets |
+ * handle end of input |
+ * handle errors/call callback |
+ * } |
+ * } |
+ */ |
+ for(;;) { |
+ if(U_SUCCESS(*err)) { |
+ /* convert */ |
+ toUnicode(pArgs, err); |
+ |
+ /* |
+ * set a flag for whether the converter |
+ * successfully processed the end of the input |
+ * |
+ * need not check cnv->preToULength==0 because a replay (<0) will cause |
+ * s<sourceLimit before converterSawEndOfInput is checked |
+ */ |
+ converterSawEndOfInput= |
+ (UBool)(U_SUCCESS(*err) && |
+ pArgs->flush && pArgs->source==pArgs->sourceLimit && |
+ cnv->toULength==0); |
+ } else { |
+ /* handle error from getNextUChar() or ucnv_convertEx() */ |
+ converterSawEndOfInput=FALSE; |
+ } |
+ |
+ /* no callback called yet for this iteration */ |
+ calledCallback=FALSE; |
+ |
+ /* no sourceIndex adjustment for conversion, only for callback output */ |
+ errorInputLength=0; |
+ |
+ /* |
+ * loop for offsets and error handling |
+ * |
+ * iterates at most 3 times: |
+ * 1. to clean up after the conversion function |
+ * 2. after the callback |
+ * 3. after the callback again if there was truncated input |
+ */ |
+ for(;;) { |
+ /* update offsets if we write any */ |
+ if(offsets!=NULL) { |
+ int32_t length=(int32_t)(pArgs->target-t); |
+ if(length>0) { |
+ _updateOffsets(offsets, length, sourceIndex, errorInputLength); |
+ |
+ /* |
+ * if a converter handles offsets and updates the offsets |
+ * pointer at the end, then pArgs->offset should not change |
+ * here; |
+ * however, some converters do not handle offsets at all |
+ * (sourceIndex<0) or may not update the offsets pointer |
+ */ |
+ pArgs->offsets=offsets+=length; |
+ } |
+ |
+ if(sourceIndex>=0) { |
+ sourceIndex+=(int32_t)(pArgs->source-s); |
+ } |
+ } |
+ |
+ if(cnv->preToULength<0) { |
+ /* |
+ * switch the source to new replay units (cannot occur while replaying) |
+ * after offset handling and before end-of-input and callback handling |
+ */ |
+ if(realSource==NULL) { |
+ realSource=pArgs->source; |
+ realSourceLimit=pArgs->sourceLimit; |
+ realFlush=pArgs->flush; |
+ realSourceIndex=sourceIndex; |
+ |
+ uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); |
+ pArgs->source=replay; |
+ pArgs->sourceLimit=replay-cnv->preToULength; |
+ pArgs->flush=FALSE; |
+ if((sourceIndex+=cnv->preToULength)<0) { |
+ sourceIndex=-1; |
+ } |
+ |
+ cnv->preToULength=0; |
+ } else { |
+ /* see implementation note before _fromUnicodeWithCallback() */ |
+ U_ASSERT(realSource==NULL); |
+ *err=U_INTERNAL_PROGRAM_ERROR; |
+ } |
+ } |
+ |
+ /* update pointers */ |
+ s=pArgs->source; |
+ t=pArgs->target; |
+ |
+ if(U_SUCCESS(*err)) { |
+ if(s<pArgs->sourceLimit) { |
+ /* |
+ * continue with the conversion loop while there is still input left |
+ * (continue converting by breaking out of only the inner loop) |
+ */ |
+ break; |
+ } else if(realSource!=NULL) { |
+ /* switch back from replaying to the real source and continue */ |
+ pArgs->source=realSource; |
+ pArgs->sourceLimit=realSourceLimit; |
+ pArgs->flush=realFlush; |
+ sourceIndex=realSourceIndex; |
+ |
+ realSource=NULL; |
+ break; |
+ } else if(pArgs->flush && cnv->toULength>0) { |
+ /* |
+ * the entire input stream is consumed |
+ * and there is a partial, truncated input sequence left |
+ */ |
+ |
+ /* inject an error and continue with callback handling */ |
+ *err=U_TRUNCATED_CHAR_FOUND; |
+ calledCallback=FALSE; /* new error condition */ |
+ } else { |
+ /* input consumed */ |
+ if(pArgs->flush) { |
+ /* |
+ * return to the conversion loop once more if the flush |
+ * flag is set and the conversion function has not |
+ * successfully processed the end of the input yet |
+ * |
+ * (continue converting by breaking out of only the inner loop) |
+ */ |
+ if(!converterSawEndOfInput) { |
+ break; |
+ } |
+ |
+ /* reset the converter without calling the callback function */ |
+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); |
+ } |
+ |
+ /* done successfully */ |
+ return; |
+ } |
+ } |
+ |
+ /* U_FAILURE(*err) */ |
+ { |
+ UErrorCode e; |
+ |
+ if( calledCallback || |
+ (e=*err)==U_BUFFER_OVERFLOW_ERROR || |
+ (e!=U_INVALID_CHAR_FOUND && |
+ e!=U_ILLEGAL_CHAR_FOUND && |
+ e!=U_TRUNCATED_CHAR_FOUND && |
+ e!=U_ILLEGAL_ESCAPE_SEQUENCE && |
+ e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) |
+ ) { |
+ /* |
+ * the callback did not or cannot resolve the error: |
+ * set output pointers and return |
+ * |
+ * the check for buffer overflow is redundant but it is |
+ * a high-runner case and hopefully documents the intent |
+ * well |
+ * |
+ * if we were replaying, then the replay buffer must be |
+ * copied back into the UConverter |
+ * and the real arguments must be restored |
+ */ |
+ if(realSource!=NULL) { |
+ int32_t length; |
+ |
+ U_ASSERT(cnv->preToULength==0); |
+ |
+ length=(int32_t)(pArgs->sourceLimit-pArgs->source); |
+ if(length>0) { |
+ uprv_memcpy(cnv->preToU, pArgs->source, length); |
+ cnv->preToULength=(int8_t)-length; |
+ } |
+ |
+ pArgs->source=realSource; |
+ pArgs->sourceLimit=realSourceLimit; |
+ pArgs->flush=realFlush; |
+ } |
+ |
+ return; |
+ } |
+ } |
+ |
+ /* copy toUBytes[] to invalidCharBuffer[] */ |
+ errorInputLength=cnv->invalidCharLength=cnv->toULength; |
+ if(errorInputLength>0) { |
+ uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); |
+ } |
+ |
+ /* set the converter state to deal with the next character */ |
+ cnv->toULength=0; |
+ |
+ /* call the callback function */ |
+ if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { |
+ cnv->toUCallbackReason = UCNV_UNASSIGNED; |
+ } |
+ cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, |
+ cnv->invalidCharBuffer, errorInputLength, |
+ cnv->toUCallbackReason, |
+ err); |
+ cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ |
+ |
+ /* |
+ * loop back to the offset handling |
+ * |
+ * this flag will indicate after offset handling |
+ * that a callback was called; |
+ * if the callback did not resolve the error, then we return |
+ */ |
+ calledCallback=TRUE; |
+ } |
+ } |
+} |
+ |
+/* |
+ * Output the toUnicode overflow buffer. |
+ * Call this function if(cnv->UCharErrorBufferLength>0). |
+ * @return TRUE if overflow |
+ */ |
+static UBool |
+ucnv_outputOverflowToUnicode(UConverter *cnv, |
+ UChar **target, const UChar *targetLimit, |
+ int32_t **pOffsets, |
+ UErrorCode *err) { |
+ int32_t *offsets; |
+ UChar *overflow, *t; |
+ int32_t i, length; |
+ |
+ t=*target; |
+ if(pOffsets!=NULL) { |
+ offsets=*pOffsets; |
+ } else { |
+ offsets=NULL; |
+ } |
+ |
+ overflow=cnv->UCharErrorBuffer; |
+ length=cnv->UCharErrorBufferLength; |
+ i=0; |
+ while(i<length) { |
+ if(t==targetLimit) { |
+ /* the overflow buffer contains too much, keep the rest */ |
+ int32_t j=0; |
+ |
+ do { |
+ overflow[j++]=overflow[i++]; |
+ } while(i<length); |
+ |
+ cnv->UCharErrorBufferLength=(int8_t)j; |
+ *target=t; |
+ if(offsets!=NULL) { |
+ *pOffsets=offsets; |
+ } |
+ *err=U_BUFFER_OVERFLOW_ERROR; |
+ return TRUE; |
+ } |
+ |
+ /* copy the overflow contents to the target */ |
+ *t++=overflow[i++]; |
+ if(offsets!=NULL) { |
+ *offsets++=-1; /* no source index available for old output */ |
+ } |
+ } |
+ |
+ /* the overflow buffer is completely copied to the target */ |
+ cnv->UCharErrorBufferLength=0; |
+ *target=t; |
+ if(offsets!=NULL) { |
+ *pOffsets=offsets; |
+ } |
+ return FALSE; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_toUnicode(UConverter *cnv, |
+ UChar **target, const UChar *targetLimit, |
+ const char **source, const char *sourceLimit, |
+ int32_t *offsets, |
+ UBool flush, |
+ UErrorCode *err) { |
+ UConverterToUnicodeArgs args; |
+ const char *s; |
+ UChar *t; |
+ |
+ /* check parameters */ |
+ if(err==NULL || U_FAILURE(*err)) { |
+ return; |
+ } |
+ |
+ if(cnv==NULL || target==NULL || source==NULL) { |
+ *err=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ s=*source; |
+ t=*target; |
+ |
+ if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { |
+ /* |
+ Prevent code from going into an infinite loop in case we do hit this |
+ limit. The limit pointer is expected to be on a UChar * boundary. |
+ This also prevents the next argument check from failing. |
+ */ |
+ targetLimit = (const UChar *)(((const char *)targetLimit) - 1); |
+ } |
+ |
+ /* |
+ * All these conditions should never happen. |
+ * |
+ * 1) Make sure that the limits are >= to the address source or target |
+ * |
+ * 2) Make sure that the buffer sizes do not exceed the number range for |
+ * int32_t because some functions use the size (in units or bytes) |
+ * rather than comparing pointers, and because offsets are int32_t values. |
+ * |
+ * size_t is guaranteed to be unsigned and large enough for the job. |
+ * |
+ * Return with an error instead of adjusting the limits because we would |
+ * not be able to maintain the semantics that either the source must be |
+ * consumed or the target filled (unless an error occurs). |
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example. |
+ * |
+ * 3) Make sure that the user didn't incorrectly cast a UChar * pointer |
+ * to a char * pointer and provide an incomplete UChar code unit. |
+ */ |
+ if (sourceLimit<s || targetLimit<t || |
+ ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || |
+ ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || |
+ (((const char *)targetLimit-(const char *)t) & 1) != 0 |
+ ) { |
+ *err=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ /* output the target overflow buffer */ |
+ if( cnv->UCharErrorBufferLength>0 && |
+ ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) |
+ ) { |
+ /* U_BUFFER_OVERFLOW_ERROR */ |
+ return; |
+ } |
+ /* *target may have moved, therefore stop using t */ |
+ |
+ if(!flush && s==sourceLimit && cnv->preToULength>=0) { |
+ /* the overflow buffer is emptied and there is no new input: we are done */ |
+ return; |
+ } |
+ |
+ /* |
+ * Do not simply return with a buffer overflow error if |
+ * !flush && t==targetLimit |
+ * because it is possible that the source will not generate any output. |
+ * For example, the skip callback may be called; |
+ * it does not output anything. |
+ */ |
+ |
+ /* prepare the converter arguments */ |
+ args.converter=cnv; |
+ args.flush=flush; |
+ args.offsets=offsets; |
+ args.source=s; |
+ args.sourceLimit=sourceLimit; |
+ args.target=*target; |
+ args.targetLimit=targetLimit; |
+ args.size=sizeof(args); |
+ |
+ _toUnicodeWithCallback(&args, err); |
+ |
+ *source=args.source; |
+ *target=args.target; |
+} |
+ |
+/* ucnv_to/fromUChars() ----------------------------------------------------- */ |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_fromUChars(UConverter *cnv, |
+ char *dest, int32_t destCapacity, |
+ const UChar *src, int32_t srcLength, |
+ UErrorCode *pErrorCode) { |
+ const UChar *srcLimit; |
+ char *originalDest, *destLimit; |
+ int32_t destLength; |
+ |
+ /* check arguments */ |
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ |
+ if( cnv==NULL || |
+ destCapacity<0 || (destCapacity>0 && dest==NULL) || |
+ srcLength<-1 || (srcLength!=0 && src==NULL) |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ /* initialize */ |
+ ucnv_resetFromUnicode(cnv); |
+ originalDest=dest; |
+ if(srcLength==-1) { |
+ srcLength=u_strlen(src); |
+ } |
+ if(srcLength>0) { |
+ srcLimit=src+srcLength; |
+ destLimit=dest+destCapacity; |
+ |
+ /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ |
+ if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { |
+ destLimit=(char *)U_MAX_PTR(dest); |
+ } |
+ |
+ /* perform the conversion */ |
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
+ destLength=(int32_t)(dest-originalDest); |
+ |
+ /* if an overflow occurs, then get the preflighting length */ |
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
+ char buffer[1024]; |
+ |
+ destLimit=buffer+sizeof(buffer); |
+ do { |
+ dest=buffer; |
+ *pErrorCode=U_ZERO_ERROR; |
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
+ destLength+=(int32_t)(dest-buffer); |
+ } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
+ } |
+ } else { |
+ destLength=0; |
+ } |
+ |
+ return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_toUChars(UConverter *cnv, |
+ UChar *dest, int32_t destCapacity, |
+ const char *src, int32_t srcLength, |
+ UErrorCode *pErrorCode) { |
+ const char *srcLimit; |
+ UChar *originalDest, *destLimit; |
+ int32_t destLength; |
+ |
+ /* check arguments */ |
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ |
+ if( cnv==NULL || |
+ destCapacity<0 || (destCapacity>0 && dest==NULL) || |
+ srcLength<-1 || (srcLength!=0 && src==NULL)) |
+ { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ /* initialize */ |
+ ucnv_resetToUnicode(cnv); |
+ originalDest=dest; |
+ if(srcLength==-1) { |
+ srcLength=(int32_t)uprv_strlen(src); |
+ } |
+ if(srcLength>0) { |
+ srcLimit=src+srcLength; |
+ destLimit=dest+destCapacity; |
+ |
+ /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ |
+ if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { |
+ destLimit=(UChar *)U_MAX_PTR(dest); |
+ } |
+ |
+ /* perform the conversion */ |
+ ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
+ destLength=(int32_t)(dest-originalDest); |
+ |
+ /* if an overflow occurs, then get the preflighting length */ |
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) |
+ { |
+ UChar buffer[1024]; |
+ |
+ destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR; |
+ do { |
+ dest=buffer; |
+ *pErrorCode=U_ZERO_ERROR; |
+ ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
+ destLength+=(int32_t)(dest-buffer); |
+ } |
+ while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
+ } |
+ } else { |
+ destLength=0; |
+ } |
+ |
+ return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); |
+} |
+ |
+/* ucnv_getNextUChar() ------------------------------------------------------ */ |
+ |
+U_CAPI UChar32 U_EXPORT2 |
+ucnv_getNextUChar(UConverter *cnv, |
+ const char **source, const char *sourceLimit, |
+ UErrorCode *err) { |
+ UConverterToUnicodeArgs args; |
+ UChar buffer[U16_MAX_LENGTH]; |
+ const char *s; |
+ UChar32 c; |
+ int32_t i, length; |
+ |
+ /* check parameters */ |
+ if(err==NULL || U_FAILURE(*err)) { |
+ return 0xffff; |
+ } |
+ |
+ if(cnv==NULL || source==NULL) { |
+ *err=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0xffff; |
+ } |
+ |
+ s=*source; |
+ if(sourceLimit<s) { |
+ *err=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0xffff; |
+ } |
+ |
+ /* |
+ * Make sure that the buffer sizes do not exceed the number range for |
+ * int32_t because some functions use the size (in units or bytes) |
+ * rather than comparing pointers, and because offsets are int32_t values. |
+ * |
+ * size_t is guaranteed to be unsigned and large enough for the job. |
+ * |
+ * Return with an error instead of adjusting the limits because we would |
+ * not be able to maintain the semantics that either the source must be |
+ * consumed or the target filled (unless an error occurs). |
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example. |
+ */ |
+ if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { |
+ *err=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0xffff; |
+ } |
+ |
+ c=U_SENTINEL; |
+ |
+ /* flush the target overflow buffer */ |
+ if(cnv->UCharErrorBufferLength>0) { |
+ UChar *overflow; |
+ |
+ overflow=cnv->UCharErrorBuffer; |
+ i=0; |
+ length=cnv->UCharErrorBufferLength; |
+ U16_NEXT(overflow, i, length, c); |
+ |
+ /* move the remaining overflow contents up to the beginning */ |
+ if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { |
+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, |
+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); |
+ } |
+ |
+ if(!U16_IS_LEAD(c) || i<length) { |
+ return c; |
+ } |
+ /* |
+ * Continue if the overflow buffer contained only a lead surrogate, |
+ * in case the converter outputs single surrogates from complete |
+ * input sequences. |
+ */ |
+ } |
+ |
+ /* |
+ * flush==TRUE is implied for ucnv_getNextUChar() |
+ * |
+ * do not simply return even if s==sourceLimit because the converter may |
+ * not have seen flush==TRUE before |
+ */ |
+ |
+ /* prepare the converter arguments */ |
+ args.converter=cnv; |
+ args.flush=TRUE; |
+ args.offsets=NULL; |
+ args.source=s; |
+ args.sourceLimit=sourceLimit; |
+ args.target=buffer; |
+ args.targetLimit=buffer+1; |
+ args.size=sizeof(args); |
+ |
+ if(c<0) { |
+ /* |
+ * call the native getNextUChar() implementation if we are |
+ * at a character boundary (toULength==0) |
+ * |
+ * unlike with _toUnicode(), getNextUChar() implementations must set |
+ * U_TRUNCATED_CHAR_FOUND for truncated input, |
+ * in addition to setting toULength/toUBytes[] |
+ */ |
+ if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { |
+ c=cnv->sharedData->impl->getNextUChar(&args, err); |
+ *source=s=args.source; |
+ if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { |
+ /* reset the converter without calling the callback function */ |
+ _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); |
+ return 0xffff; /* no output */ |
+ } else if(U_SUCCESS(*err) && c>=0) { |
+ return c; |
+ /* |
+ * else fall through to use _toUnicode() because |
+ * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all |
+ * U_FAILURE: call _toUnicode() for callback handling (do not output c) |
+ */ |
+ } |
+ } |
+ |
+ /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ |
+ _toUnicodeWithCallback(&args, err); |
+ |
+ if(*err==U_BUFFER_OVERFLOW_ERROR) { |
+ *err=U_ZERO_ERROR; |
+ } |
+ |
+ i=0; |
+ length=(int32_t)(args.target-buffer); |
+ } else { |
+ /* write the lead surrogate from the overflow buffer */ |
+ buffer[0]=(UChar)c; |
+ args.target=buffer+1; |
+ i=0; |
+ length=1; |
+ } |
+ |
+ /* buffer contents starts at i and ends before length */ |
+ |
+ if(U_FAILURE(*err)) { |
+ c=0xffff; /* no output */ |
+ } else if(length==0) { |
+ /* no input or only state changes */ |
+ *err=U_INDEX_OUTOFBOUNDS_ERROR; |
+ /* no need to reset explicitly because _toUnicodeWithCallback() did it */ |
+ c=0xffff; /* no output */ |
+ } else { |
+ c=buffer[0]; |
+ i=1; |
+ if(!U16_IS_LEAD(c)) { |
+ /* consume c=buffer[0], done */ |
+ } else { |
+ /* got a lead surrogate, see if a trail surrogate follows */ |
+ UChar c2; |
+ |
+ if(cnv->UCharErrorBufferLength>0) { |
+ /* got overflow output from the conversion */ |
+ if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { |
+ /* got a trail surrogate, too */ |
+ c=U16_GET_SUPPLEMENTARY(c, c2); |
+ |
+ /* move the remaining overflow contents up to the beginning */ |
+ if((--cnv->UCharErrorBufferLength)>0) { |
+ uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, |
+ cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); |
+ } |
+ } else { |
+ /* c is an unpaired lead surrogate, just return it */ |
+ } |
+ } else if(args.source<sourceLimit) { |
+ /* convert once more, to buffer[1] */ |
+ args.targetLimit=buffer+2; |
+ _toUnicodeWithCallback(&args, err); |
+ if(*err==U_BUFFER_OVERFLOW_ERROR) { |
+ *err=U_ZERO_ERROR; |
+ } |
+ |
+ length=(int32_t)(args.target-buffer); |
+ if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { |
+ /* got a trail surrogate, too */ |
+ c=U16_GET_SUPPLEMENTARY(c, c2); |
+ i=2; |
+ } |
+ } |
+ } |
+ } |
+ |
+ /* |
+ * move leftover output from buffer[i..length[ |
+ * into the beginning of the overflow buffer |
+ */ |
+ if(i<length) { |
+ /* move further overflow back */ |
+ int32_t delta=length-i; |
+ if((length=cnv->UCharErrorBufferLength)>0) { |
+ uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, |
+ length*U_SIZEOF_UCHAR); |
+ } |
+ cnv->UCharErrorBufferLength=(int8_t)(length+delta); |
+ |
+ cnv->UCharErrorBuffer[0]=buffer[i++]; |
+ if(delta>1) { |
+ cnv->UCharErrorBuffer[1]=buffer[i]; |
+ } |
+ } |
+ |
+ *source=args.source; |
+ return c; |
+} |
+ |
+/* ucnv_convert() and siblings ---------------------------------------------- */ |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, |
+ char **target, const char *targetLimit, |
+ const char **source, const char *sourceLimit, |
+ UChar *pivotStart, UChar **pivotSource, |
+ UChar **pivotTarget, const UChar *pivotLimit, |
+ UBool reset, UBool flush, |
+ UErrorCode *pErrorCode) { |
+ UChar pivotBuffer[CHUNK_SIZE]; |
+ const UChar *myPivotSource; |
+ UChar *myPivotTarget; |
+ const char *s; |
+ char *t; |
+ |
+ UConverterToUnicodeArgs toUArgs; |
+ UConverterFromUnicodeArgs fromUArgs; |
+ UConverterConvert convert; |
+ |
+ /* error checking */ |
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
+ return; |
+ } |
+ |
+ if( targetCnv==NULL || sourceCnv==NULL || |
+ source==NULL || *source==NULL || |
+ target==NULL || *target==NULL || targetLimit==NULL |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ s=*source; |
+ t=*target; |
+ if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ /* |
+ * Make sure that the buffer sizes do not exceed the number range for |
+ * int32_t. See ucnv_toUnicode() for a more detailed comment. |
+ */ |
+ if( |
+ (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || |
+ ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ if(pivotStart==NULL) { |
+ if(!flush) { |
+ /* streaming conversion requires an explicit pivot buffer */ |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ /* use the stack pivot buffer */ |
+ myPivotSource=myPivotTarget=pivotStart=pivotBuffer; |
+ pivotSource=(UChar **)&myPivotSource; |
+ pivotTarget=&myPivotTarget; |
+ pivotLimit=pivotBuffer+CHUNK_SIZE; |
+ } else if( pivotStart>=pivotLimit || |
+ pivotSource==NULL || *pivotSource==NULL || |
+ pivotTarget==NULL || *pivotTarget==NULL || |
+ pivotLimit==NULL |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ |
+ if(sourceLimit==NULL) { |
+ /* get limit of single-byte-NUL-terminated source string */ |
+ sourceLimit=uprv_strchr(*source, 0); |
+ } |
+ |
+ if(reset) { |
+ ucnv_resetToUnicode(sourceCnv); |
+ ucnv_resetFromUnicode(targetCnv); |
+ *pivotSource=*pivotTarget=pivotStart; |
+ } else if(targetCnv->charErrorBufferLength>0) { |
+ /* output the targetCnv overflow buffer */ |
+ if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { |
+ /* U_BUFFER_OVERFLOW_ERROR */ |
+ return; |
+ } |
+ /* *target has moved, therefore stop using t */ |
+ |
+ if( !flush && |
+ targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && |
+ sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit |
+ ) { |
+ /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ |
+ return; |
+ } |
+ } |
+ |
+ /* Is direct-UTF-8 conversion available? */ |
+ if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && |
+ targetCnv->sharedData->impl->fromUTF8!=NULL |
+ ) { |
+ convert=targetCnv->sharedData->impl->fromUTF8; |
+ } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && |
+ sourceCnv->sharedData->impl->toUTF8!=NULL |
+ ) { |
+ convert=sourceCnv->sharedData->impl->toUTF8; |
+ } else { |
+ convert=NULL; |
+ } |
+ |
+ /* |
+ * If direct-UTF-8 conversion is available, then we use a smaller |
+ * pivot buffer for error handling and partial matches |
+ * so that we quickly return to direct conversion. |
+ * |
+ * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. |
+ * |
+ * We could reduce the pivot buffer size further, at the cost of |
+ * buffer overflows from callbacks. |
+ * The pivot buffer should not be smaller than the maximum number of |
+ * fromUnicode extension table input UChars |
+ * (for m:n conversion, see |
+ * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) |
+ * or 2 for surrogate pairs. |
+ * |
+ * Too small a buffer can cause thrashing between pivoting and direct |
+ * conversion, with function call overhead outweighing the benefits |
+ * of direct conversion. |
+ */ |
+ if(convert!=NULL && (pivotLimit-pivotStart)>32) { |
+ pivotLimit=pivotStart+32; |
+ } |
+ |
+ /* prepare the converter arguments */ |
+ fromUArgs.converter=targetCnv; |
+ fromUArgs.flush=FALSE; |
+ fromUArgs.offsets=NULL; |
+ fromUArgs.target=*target; |
+ fromUArgs.targetLimit=targetLimit; |
+ fromUArgs.size=sizeof(fromUArgs); |
+ |
+ toUArgs.converter=sourceCnv; |
+ toUArgs.flush=flush; |
+ toUArgs.offsets=NULL; |
+ toUArgs.source=s; |
+ toUArgs.sourceLimit=sourceLimit; |
+ toUArgs.targetLimit=pivotLimit; |
+ toUArgs.size=sizeof(toUArgs); |
+ |
+ /* |
+ * TODO: Consider separating this function into two functions, |
+ * extracting exactly the conversion loop, |
+ * for readability and to reduce the set of visible variables. |
+ * |
+ * Otherwise stop using s and t from here on. |
+ */ |
+ s=t=NULL; |
+ |
+ /* |
+ * conversion loop |
+ * |
+ * The sequence of steps in the loop may appear backward, |
+ * but the principle is simple: |
+ * In the chain of |
+ * source - sourceCnv overflow - pivot - targetCnv overflow - target |
+ * empty out later buffers before refilling them from earlier ones. |
+ * |
+ * The targetCnv overflow buffer is flushed out only once before the loop. |
+ */ |
+ for(;;) { |
+ /* |
+ * if(pivot not empty or error or replay or flush fromUnicode) { |
+ * fromUnicode(pivot -> target); |
+ * } |
+ * |
+ * For pivoting conversion; and for direct conversion for |
+ * error callback handling and flushing the replay buffer. |
+ */ |
+ if( *pivotSource<*pivotTarget || |
+ U_FAILURE(*pErrorCode) || |
+ targetCnv->preFromULength<0 || |
+ fromUArgs.flush |
+ ) { |
+ fromUArgs.source=*pivotSource; |
+ fromUArgs.sourceLimit=*pivotTarget; |
+ _fromUnicodeWithCallback(&fromUArgs, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ /* target overflow, or conversion error */ |
+ *pivotSource=(UChar *)fromUArgs.source; |
+ break; |
+ } |
+ |
+ /* |
+ * _fromUnicodeWithCallback() must have consumed the pivot contents |
+ * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() |
+ */ |
+ } |
+ |
+ /* The pivot buffer is empty; reset it so we start at pivotStart. */ |
+ *pivotSource=*pivotTarget=pivotStart; |
+ |
+ /* |
+ * if(sourceCnv overflow buffer not empty) { |
+ * move(sourceCnv overflow buffer -> pivot); |
+ * continue; |
+ * } |
+ */ |
+ /* output the sourceCnv overflow buffer */ |
+ if(sourceCnv->UCharErrorBufferLength>0) { |
+ if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { |
+ /* U_BUFFER_OVERFLOW_ERROR */ |
+ *pErrorCode=U_ZERO_ERROR; |
+ } |
+ continue; |
+ } |
+ |
+ /* |
+ * check for end of input and break if done |
+ * |
+ * Checking both flush and fromUArgs.flush ensures that the converters |
+ * have been called with the flush flag set if the ucnv_convertEx() |
+ * caller set it. |
+ */ |
+ if( toUArgs.source==sourceLimit && |
+ sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && |
+ (!flush || fromUArgs.flush) |
+ ) { |
+ /* done successfully */ |
+ break; |
+ } |
+ |
+ /* |
+ * use direct conversion if available |
+ * but not if continuing a partial match |
+ * or flushing the toUnicode replay buffer |
+ */ |
+ if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { |
+ if(*pErrorCode==U_USING_DEFAULT_WARNING) { |
+ /* remove a warning that may be set by this function */ |
+ *pErrorCode=U_ZERO_ERROR; |
+ } |
+ convert(&fromUArgs, &toUArgs, pErrorCode); |
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
+ break; |
+ } else if(U_FAILURE(*pErrorCode)) { |
+ if(sourceCnv->toULength>0) { |
+ /* |
+ * Fall through to calling _toUnicodeWithCallback() |
+ * for callback handling. |
+ * |
+ * The pivot buffer will be reset with |
+ * *pivotSource=*pivotTarget=pivotStart; |
+ * which indicates a toUnicode error to the caller |
+ * (*pivotSource==pivotStart shows no pivot UChars consumed). |
+ */ |
+ } else { |
+ /* |
+ * Indicate a fromUnicode error to the caller |
+ * (*pivotSource>pivotStart shows some pivot UChars consumed). |
+ */ |
+ *pivotSource=*pivotTarget=pivotStart+1; |
+ /* |
+ * Loop around to calling _fromUnicodeWithCallbacks() |
+ * for callback handling. |
+ */ |
+ continue; |
+ } |
+ } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { |
+ /* |
+ * No error, but the implementation requested to temporarily |
+ * fall back to pivoting. |
+ */ |
+ *pErrorCode=U_ZERO_ERROR; |
+ /* |
+ * The following else branches are almost identical to the end-of-input |
+ * handling in _toUnicodeWithCallback(). |
+ * Avoid calling it just for the end of input. |
+ */ |
+ } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ |
+ /* |
+ * the entire input stream is consumed |
+ * and there is a partial, truncated input sequence left |
+ */ |
+ |
+ /* inject an error and continue with callback handling */ |
+ *pErrorCode=U_TRUNCATED_CHAR_FOUND; |
+ } else { |
+ /* input consumed */ |
+ if(flush) { |
+ /* reset the converters without calling the callback functions */ |
+ _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); |
+ _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); |
+ } |
+ |
+ /* done successfully */ |
+ break; |
+ } |
+ } |
+ |
+ /* |
+ * toUnicode(source -> pivot); |
+ * |
+ * For pivoting conversion; and for direct conversion for |
+ * error callback handling, continuing partial matches |
+ * and flushing the replay buffer. |
+ * |
+ * The pivot buffer is empty and reset. |
+ */ |
+ toUArgs.target=pivotStart; /* ==*pivotTarget */ |
+ /* toUArgs.targetLimit=pivotLimit; already set before the loop */ |
+ _toUnicodeWithCallback(&toUArgs, pErrorCode); |
+ *pivotTarget=toUArgs.target; |
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
+ /* pivot overflow: continue with the conversion loop */ |
+ *pErrorCode=U_ZERO_ERROR; |
+ } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { |
+ /* conversion error, or there was nothing left to convert */ |
+ break; |
+ } |
+ /* |
+ * else: |
+ * _toUnicodeWithCallback() wrote into the pivot buffer, |
+ * continue with fromUnicode conversion. |
+ * |
+ * Set the fromUnicode flush flag if we flush and if toUnicode has |
+ * processed the end of the input. |
+ */ |
+ if( flush && toUArgs.source==sourceLimit && |
+ sourceCnv->preToULength>=0 && |
+ sourceCnv->UCharErrorBufferLength==0 |
+ ) { |
+ fromUArgs.flush=TRUE; |
+ } |
+ } |
+ |
+ /* |
+ * The conversion loop is exited when one of the following is true: |
+ * - the entire source text has been converted successfully to the target buffer |
+ * - a target buffer overflow occurred |
+ * - a conversion error occurred |
+ */ |
+ |
+ *source=toUArgs.source; |
+ *target=fromUArgs.target; |
+ |
+ /* terminate the target buffer if possible */ |
+ if(flush && U_SUCCESS(*pErrorCode)) { |
+ if(*target!=targetLimit) { |
+ **target=0; |
+ if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { |
+ *pErrorCode=U_ZERO_ERROR; |
+ } |
+ } else { |
+ *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; |
+ } |
+ } |
+} |
+ |
+/* internal implementation of ucnv_convert() etc. with preflighting */ |
+static int32_t |
+ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, |
+ char *target, int32_t targetCapacity, |
+ const char *source, int32_t sourceLength, |
+ UErrorCode *pErrorCode) { |
+ UChar pivotBuffer[CHUNK_SIZE]; |
+ UChar *pivot, *pivot2; |
+ |
+ char *myTarget; |
+ const char *sourceLimit; |
+ const char *targetLimit; |
+ int32_t targetLength=0; |
+ |
+ /* set up */ |
+ if(sourceLength<0) { |
+ sourceLimit=uprv_strchr(source, 0); |
+ } else { |
+ sourceLimit=source+sourceLength; |
+ } |
+ |
+ /* if there is no input data, we're done */ |
+ if(source==sourceLimit) { |
+ return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
+ } |
+ |
+ pivot=pivot2=pivotBuffer; |
+ myTarget=target; |
+ targetLength=0; |
+ |
+ if(targetCapacity>0) { |
+ /* perform real conversion */ |
+ targetLimit=target+targetCapacity; |
+ ucnv_convertEx(outConverter, inConverter, |
+ &myTarget, targetLimit, |
+ &source, sourceLimit, |
+ pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, |
+ FALSE, |
+ TRUE, |
+ pErrorCode); |
+ targetLength=(int32_t)(myTarget-target); |
+ } |
+ |
+ /* |
+ * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing |
+ * to it but continue the conversion in order to store in targetCapacity |
+ * the number of bytes that was required. |
+ */ |
+ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) |
+ { |
+ char targetBuffer[CHUNK_SIZE]; |
+ |
+ targetLimit=targetBuffer+CHUNK_SIZE; |
+ do { |
+ *pErrorCode=U_ZERO_ERROR; |
+ myTarget=targetBuffer; |
+ ucnv_convertEx(outConverter, inConverter, |
+ &myTarget, targetLimit, |
+ &source, sourceLimit, |
+ pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, |
+ FALSE, |
+ TRUE, |
+ pErrorCode); |
+ targetLength+=(int32_t)(myTarget-targetBuffer); |
+ } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
+ |
+ /* done with preflighting, set warnings and errors as appropriate */ |
+ return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); |
+ } |
+ |
+ /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ |
+ return targetLength; |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_convert(const char *toConverterName, const char *fromConverterName, |
+ char *target, int32_t targetCapacity, |
+ const char *source, int32_t sourceLength, |
+ UErrorCode *pErrorCode) { |
+ UConverter in, out; /* stack-allocated */ |
+ UConverter *inConverter, *outConverter; |
+ int32_t targetLength; |
+ |
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ |
+ if( source==NULL || sourceLength<-1 || |
+ targetCapacity<0 || (targetCapacity>0 && target==NULL) |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ /* if there is no input data, we're done */ |
+ if(sourceLength==0 || (sourceLength<0 && *source==0)) { |
+ return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
+ } |
+ |
+ /* create the converters */ |
+ inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ |
+ outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ ucnv_close(inConverter); |
+ return 0; |
+ } |
+ |
+ targetLength=ucnv_internalConvert(outConverter, inConverter, |
+ target, targetCapacity, |
+ source, sourceLength, |
+ pErrorCode); |
+ |
+ ucnv_close(inConverter); |
+ ucnv_close(outConverter); |
+ |
+ return targetLength; |
+} |
+ |
+/* @internal */ |
+static int32_t |
+ucnv_convertAlgorithmic(UBool convertToAlgorithmic, |
+ UConverterType algorithmicType, |
+ UConverter *cnv, |
+ char *target, int32_t targetCapacity, |
+ const char *source, int32_t sourceLength, |
+ UErrorCode *pErrorCode) { |
+ UConverter algoConverterStatic; /* stack-allocated */ |
+ UConverter *algoConverter, *to, *from; |
+ int32_t targetLength; |
+ |
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ |
+ if( cnv==NULL || source==NULL || sourceLength<-1 || |
+ targetCapacity<0 || (targetCapacity>0 && target==NULL) |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ /* if there is no input data, we're done */ |
+ if(sourceLength==0 || (sourceLength<0 && *source==0)) { |
+ return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
+ } |
+ |
+ /* create the algorithmic converter */ |
+ algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, |
+ "", 0, pErrorCode); |
+ if(U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ |
+ /* reset the other converter */ |
+ if(convertToAlgorithmic) { |
+ /* cnv->Unicode->algo */ |
+ ucnv_resetToUnicode(cnv); |
+ to=algoConverter; |
+ from=cnv; |
+ } else { |
+ /* algo->Unicode->cnv */ |
+ ucnv_resetFromUnicode(cnv); |
+ from=algoConverter; |
+ to=cnv; |
+ } |
+ |
+ targetLength=ucnv_internalConvert(to, from, |
+ target, targetCapacity, |
+ source, sourceLength, |
+ pErrorCode); |
+ |
+ ucnv_close(algoConverter); |
+ |
+ return targetLength; |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_toAlgorithmic(UConverterType algorithmicType, |
+ UConverter *cnv, |
+ char *target, int32_t targetCapacity, |
+ const char *source, int32_t sourceLength, |
+ UErrorCode *pErrorCode) { |
+ return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, |
+ target, targetCapacity, |
+ source, sourceLength, |
+ pErrorCode); |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_fromAlgorithmic(UConverter *cnv, |
+ UConverterType algorithmicType, |
+ char *target, int32_t targetCapacity, |
+ const char *source, int32_t sourceLength, |
+ UErrorCode *pErrorCode) { |
+ return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, |
+ target, targetCapacity, |
+ source, sourceLength, |
+ pErrorCode); |
+} |
+ |
+U_CAPI UConverterType U_EXPORT2 |
+ucnv_getType(const UConverter* converter) |
+{ |
+ int8_t type = converter->sharedData->staticData->conversionType; |
+#if !UCONFIG_NO_LEGACY_CONVERSION |
+ if(type == UCNV_MBCS) { |
+ return ucnv_MBCSGetType(converter); |
+ } |
+#endif |
+ return (UConverterType)type; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_getStarters(const UConverter* converter, |
+ UBool starters[256], |
+ UErrorCode* err) |
+{ |
+ if (err == NULL || U_FAILURE(*err)) { |
+ return; |
+ } |
+ |
+ if(converter->sharedData->impl->getStarters != NULL) { |
+ converter->sharedData->impl->getStarters(converter, starters, err); |
+ } else { |
+ *err = U_ILLEGAL_ARGUMENT_ERROR; |
+ } |
+} |
+ |
+static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) |
+{ |
+ UErrorCode errorCode; |
+ const char *name; |
+ int32_t i; |
+ |
+ if(cnv==NULL) { |
+ return NULL; |
+ } |
+ |
+ errorCode=U_ZERO_ERROR; |
+ name=ucnv_getName(cnv, &errorCode); |
+ if(U_FAILURE(errorCode)) { |
+ return NULL; |
+ } |
+ |
+ for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) |
+ { |
+ if(0==uprv_strcmp(name, ambiguousConverters[i].name)) |
+ { |
+ return ambiguousConverters+i; |
+ } |
+ } |
+ |
+ return NULL; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_fixFileSeparator(const UConverter *cnv, |
+ UChar* source, |
+ int32_t sourceLength) { |
+ const UAmbiguousConverter *a; |
+ int32_t i; |
+ UChar variant5c; |
+ |
+ if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) |
+ { |
+ return; |
+ } |
+ |
+ variant5c=a->variant5c; |
+ for(i=0; i<sourceLength; ++i) { |
+ if(source[i]==variant5c) { |
+ source[i]=0x5c; |
+ } |
+ } |
+} |
+ |
+U_CAPI UBool U_EXPORT2 |
+ucnv_isAmbiguous(const UConverter *cnv) { |
+ return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_setFallback(UConverter *cnv, UBool usesFallback) |
+{ |
+ cnv->useFallback = usesFallback; |
+} |
+ |
+U_CAPI UBool U_EXPORT2 |
+ucnv_usesFallback(const UConverter *cnv) |
+{ |
+ return cnv->useFallback; |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_getInvalidChars (const UConverter * converter, |
+ char *errBytes, |
+ int8_t * len, |
+ UErrorCode * err) |
+{ |
+ if (err == NULL || U_FAILURE(*err)) |
+ { |
+ return; |
+ } |
+ if (len == NULL || errBytes == NULL || converter == NULL) |
+ { |
+ *err = U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ if (*len < converter->invalidCharLength) |
+ { |
+ *err = U_INDEX_OUTOFBOUNDS_ERROR; |
+ return; |
+ } |
+ if ((*len = converter->invalidCharLength) > 0) |
+ { |
+ uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); |
+ } |
+} |
+ |
+U_CAPI void U_EXPORT2 |
+ucnv_getInvalidUChars (const UConverter * converter, |
+ UChar *errChars, |
+ int8_t * len, |
+ UErrorCode * err) |
+{ |
+ if (err == NULL || U_FAILURE(*err)) |
+ { |
+ return; |
+ } |
+ if (len == NULL || errChars == NULL || converter == NULL) |
+ { |
+ *err = U_ILLEGAL_ARGUMENT_ERROR; |
+ return; |
+ } |
+ if (*len < converter->invalidUCharLength) |
+ { |
+ *err = U_INDEX_OUTOFBOUNDS_ERROR; |
+ return; |
+ } |
+ if ((*len = converter->invalidUCharLength) > 0) |
+ { |
+ uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); |
+ } |
+} |
+ |
+#define SIG_MAX_LEN 5 |
+ |
+U_CAPI const char* U_EXPORT2 |
+ucnv_detectUnicodeSignature( const char* source, |
+ int32_t sourceLength, |
+ int32_t* signatureLength, |
+ UErrorCode* pErrorCode) { |
+ int32_t dummy; |
+ |
+ /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN |
+ * bytes we don't misdetect something |
+ */ |
+ char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; |
+ int i = 0; |
+ |
+ if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ |
+ return NULL; |
+ } |
+ |
+ if(source == NULL || sourceLength < -1){ |
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
+ return NULL; |
+ } |
+ |
+ if(signatureLength == NULL) { |
+ signatureLength = &dummy; |
+ } |
+ |
+ if(sourceLength==-1){ |
+ sourceLength=(int32_t)uprv_strlen(source); |
+ } |
+ |
+ |
+ while(i<sourceLength&& i<SIG_MAX_LEN){ |
+ start[i]=source[i]; |
+ i++; |
+ } |
+ |
+ if(start[0] == '\xFE' && start[1] == '\xFF') { |
+ *signatureLength=2; |
+ return "UTF-16BE"; |
+ } else if(start[0] == '\xFF' && start[1] == '\xFE') { |
+ if(start[2] == '\x00' && start[3] =='\x00') { |
+ *signatureLength=4; |
+ return "UTF-32LE"; |
+ } else { |
+ *signatureLength=2; |
+ return "UTF-16LE"; |
+ } |
+ } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { |
+ *signatureLength=3; |
+ return "UTF-8"; |
+ } else if(start[0] == '\x00' && start[1] == '\x00' && |
+ start[2] == '\xFE' && start[3]=='\xFF') { |
+ *signatureLength=4; |
+ return "UTF-32BE"; |
+ } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { |
+ *signatureLength=3; |
+ return "SCSU"; |
+ } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { |
+ *signatureLength=3; |
+ return "BOCU-1"; |
+ } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { |
+ /* |
+ * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ |
+ * depending on the second UTF-16 code unit. |
+ * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF |
+ * if it occurs. |
+ * |
+ * So far we have +/v |
+ */ |
+ if(start[3] == '\x38' && start[4] == '\x2D') { |
+ /* 5 bytes +/v8- */ |
+ *signatureLength=5; |
+ return "UTF-7"; |
+ } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { |
+ /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ |
+ *signatureLength=4; |
+ return "UTF-7"; |
+ } |
+ }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ |
+ *signatureLength=4; |
+ return "UTF-EBCDIC"; |
+ } |
+ |
+ |
+ /* no known Unicode signature byte sequence recognized */ |
+ *signatureLength=0; |
+ return NULL; |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) |
+{ |
+ if(status == NULL || U_FAILURE(*status)){ |
+ return -1; |
+ } |
+ if(cnv == NULL){ |
+ *status = U_ILLEGAL_ARGUMENT_ERROR; |
+ return -1; |
+ } |
+ |
+ if(cnv->preFromULength > 0){ |
+ return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; |
+ }else if(cnv->preFromULength < 0){ |
+ return -cnv->preFromULength ; |
+ }else if(cnv->fromUChar32 > 0){ |
+ return 1; |
+ }else if(cnv->preFromUFirstCP >0){ |
+ return U16_LENGTH(cnv->preFromUFirstCP); |
+ } |
+ return 0; |
+ |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ |
+ |
+ if(status == NULL || U_FAILURE(*status)){ |
+ return -1; |
+ } |
+ if(cnv == NULL){ |
+ *status = U_ILLEGAL_ARGUMENT_ERROR; |
+ return -1; |
+ } |
+ |
+ if(cnv->preToULength > 0){ |
+ return cnv->preToULength ; |
+ }else if(cnv->preToULength < 0){ |
+ return -cnv->preToULength; |
+ }else if(cnv->toULength > 0){ |
+ return cnv->toULength; |
+ } |
+ return 0; |
+} |
+#endif |
+ |
+/* |
+ * Hey, Emacs, please set the following: |
+ * |
+ * Local Variables: |
+ * indent-tabs-mode: nil |
+ * End: |
+ * |
+ */ |
Property changes on: icu46/source/common/ucnv.c |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |