icu46/source/common/ucnv.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/ucnv.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 ******************************************************************************

	3 *

	4 * Copyright (C) 1998-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 ******************************************************************************

	8 *

	9 * ucnv.c:

	10 * Implements APIs for the ICU's codeset conversion library;

	11 * mostly calls through internal functions;

	12 * created by Bertrand A. Damiba

	13 *

	14 * Modification History:

	15 *

	16 * Date Name Description

	17 * 04/04/99 helena Fixed internal header inclusion.

	18 * 05/09/00 helena Added implementation to handle fallback mappings.

	19 * 06/20/2000 helena OS/400 port changes; mostly typecast.

	20 */

	21

	22 #include "unicode/utypes.h"

	23

	24 #if !UCONFIG_NO_CONVERSION

	25

	26 #include "unicode/ustring.h"

	27 #include "unicode/ucnv.h"

	28 #include "unicode/ucnv_err.h"

	29 #include "unicode/uset.h"

	30 #include "putilimp.h"

	31 #include "cmemory.h"

	32 #include "cstring.h"

	33 #include "uassert.h"

	34 #include "utracimp.h"

	35 #include "ustr_imp.h"

	36 #include "ucnv_imp.h"

	37 #include "ucnv_cnv.h"

	38 #include "ucnv_bld.h"

	39

	40 /* size of intermediate and preflighting buffers in ucnv_convert() */

	41 #define CHUNK_SIZE 1024

	42

	43 typedef struct UAmbiguousConverter {

	44 const char *name;

	45 const UChar variant5c;

	46 } UAmbiguousConverter;

	47

	48 static const UAmbiguousConverter ambiguousConverters[]={

	49 { "ibm-897_P100-1995", 0xa5 },

	50 { "ibm-942_P120-1999", 0xa5 },

	51 { "ibm-943_P130-1999", 0xa5 },

	52 { "ibm-946_P100-1995", 0xa5 },

	53 { "ibm-33722_P120-1999", 0xa5 },

	54 { "ibm-1041_P100-1995", 0xa5 },

	55 /{ "ibm-54191_P100-2006", 0xa5 },/

	56 /{ "ibm-62383_P100-2007", 0xa5 },/

	57 /{ "ibm-891_P100-1995", 0x20a9 },/

	58 { "ibm-944_P100-1995", 0x20a9 },

	59 { "ibm-949_P110-1999", 0x20a9 },

	60 { "ibm-1363_P110-1997", 0x20a9 },

	61 { "ISO_2022,locale=ko,version=0", 0x20a9 },

	62 { "ibm-1088_P100-1995", 0x20a9 }

	63 };

	64

	65 /Calls through createConverter /

	66 U_CAPI UConverter* U_EXPORT2

	67 ucnv_open (const char *name,

	68 UErrorCode * err)

	69 {

	70 UConverter *r;

	71

	72 if (err == NULL \|\| U_FAILURE (*err)) {

	73 return NULL;

	74 }

	75

	76 r = ucnv_createConverter(NULL, name, err);

	77 return r;

	78 }

	79

	80 U_CAPI UConverter* U_EXPORT2

	81 ucnv_openPackage (const char packageName, const char converterName, UErrorCo de * err)

	82 {

	83 return ucnv_createConverterFromPackage(packageName, converterName, err);

	84 }

	85

	86 /Extracts the UChar to a char* and calls through createConverter */

	87 U_CAPI UConverter* U_EXPORT2

	88 ucnv_openU (const UChar * name,

	89 UErrorCode * err)

	90 {

	91 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];

	92

	93 if (err == NULL \|\| U_FAILURE(*err))

	94 return NULL;

	95 if (name == NULL)

	96 return ucnv_open (NULL, err);

	97 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)

	98 {

	99 *err = U_ILLEGAL_ARGUMENT_ERROR;

	100 return NULL;

	101 }

	102 return ucnv_open(u_austrcpy(asciiName, name), err);

	103 }

	104

	105 /* Copy the string that is represented by the UConverterPlatform enum

	106 * @param platformString An output buffer

	107 * @param platform An enum representing a platform

	108 * @return the length of the copied string.

	109 */

	110 static int32_t

	111 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)

	112 {

	113 switch (pltfrm)

	114 {

	115 case UCNV_IBM:

	116 uprv_strcpy(platformString, "ibm-");

	117 return 4;

	118 case UCNV_UNKNOWN:

	119 break;

	120 }

	121

	122 /* default to empty string */

	123 *platformString = 0;

	124 return 0;

	125 }

	126

	127 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls

	128 through createConverter/

	129 U_CAPI UConverter* U_EXPORT2

	130 ucnv_openCCSID (int32_t codepage,

	131 UConverterPlatform platform,

	132 UErrorCode * err)

	133 {

	134 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];

	135 int32_t myNameLen;

	136

	137 if (err == NULL \|\| U_FAILURE (*err))

	138 return NULL;

	139

	140 /* ucnv_copyPlatformString could return "ibm-" or "cp" */

	141 myNameLen = ucnv_copyPlatformString(myName, platform);

	142 T_CString_integerToString(myName + myNameLen, codepage, 10);

	143

	144 return ucnv_createConverter(NULL, myName, err);

	145 }

	146

	147 /* Creating a temporary stack-based object that can be used in one thread,

	148 and created from a converter that is shared across threads.

	149 */

	150

	151 U_CAPI UConverter* U_EXPORT2

	152 ucnv_safeClone(const UConverter* cnv, void stackBuffer, int32_t pBufferSize, U ErrorCode *status)

	153 {

	154 UConverter localConverter, allocatedConverter;

	155 int32_t bufferSizeNeeded;

	156 char stackBufferChars = (char )stackBuffer;

	157 UErrorCode cbErr;

	158 UConverterToUnicodeArgs toUArgs = {

	159 sizeof(UConverterToUnicodeArgs),

	160 TRUE,

	161 NULL,

	162 NULL,

	163 NULL,

	164 NULL,

	165 NULL,

	166 NULL

	167 };

	168 UConverterFromUnicodeArgs fromUArgs = {

	169 sizeof(UConverterFromUnicodeArgs),

	170 TRUE,

	171 NULL,

	172 NULL,

	173 NULL,

	174 NULL,

	175 NULL,

	176 NULL

	177 };

	178

	179 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);

	180

	181 if (status == NULL \|\| U_FAILURE(*status)){

	182 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);

	183 return 0;

	184 }

	185

	186 if (!pBufferSize \|\| !cnv){

	187 *status = U_ILLEGAL_ARGUMENT_ERROR;

	188 UTRACE_EXIT_STATUS(*status);

	189 return 0;

	190 }

	191

	192 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer % p",

	193 ucnv_getName(cnv, status), cnv, stackBuffer) ;

	194

	195 if (cnv->sharedData->impl->safeClone != NULL) {

	196 /* call the custom safeClone function for sizing */

	197 bufferSizeNeeded = 0;

	198 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);

	199 }

	200 else

	201 {

	202 /* inherent sizing */

	203 bufferSizeNeeded = sizeof(UConverter);

	204 }

	205

	206 if (pBufferSize <= 0){ / 'preflighting' request - set needed size into pB ufferSize /

	207 *pBufferSize = bufferSizeNeeded;

	208 UTRACE_EXIT_VALUE(bufferSizeNeeded);

	209 return 0;

	210 }

	211

	212

	213 /* Pointers on 64-bit platforms need to be aligned

	214 * on a 64-bit boundary in memory.

	215 */

	216 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {

	217 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);

	218 if(*pBufferSize > offsetUp) {

	219 *pBufferSize -= offsetUp;

	220 stackBufferChars += offsetUp;

	221 } else {

	222 /* prevent using the stack buffer but keep the size > 0 so that we d o not just preflight */

	223 *pBufferSize = 1;

	224 }

	225 }

	226

	227 stackBuffer = (void *)stackBufferChars;

	228

	229 /* Now, see if we must allocate any memory */

	230 if (*pBufferSize < bufferSizeNeeded \|\| stackBuffer == NULL)

	231 {

	232 /* allocate one here...*/

	233 localConverter = allocatedConverter = (UConverter *) uprv_malloc (buffer SizeNeeded);

	234

	235 if(localConverter == NULL) {

	236 *status = U_MEMORY_ALLOCATION_ERROR;

	237 UTRACE_EXIT_STATUS(*status);

	238 return NULL;

	239 }

	240

	241 if (U_SUCCESS(*status)) {

	242 *status = U_SAFECLONE_ALLOCATED_WARNING;

	243 }

	244

	245 /* record the fact that memory was allocated */

	246 *pBufferSize = bufferSizeNeeded;

	247 } else {

	248 /* just use the stack buffer */

	249 localConverter = (UConverter*) stackBuffer;

	250 allocatedConverter = NULL;

	251 }

	252

	253 uprv_memset(localConverter, 0, bufferSizeNeeded);

	254

	255 /* Copy initial state */

	256 uprv_memcpy(localConverter, cnv, sizeof(UConverter));

	257 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;

	258

	259 /* copy the substitution string */

	260 if (cnv->subChars == (uint8_t *)cnv->subUChars) {

	261 localConverter->subChars = (uint8_t *)localConverter->subUChars;

	262 } else {

	263 localConverter->subChars = (uint8_t )uprv_malloc(UCNV_ERROR_BUFFER_LENG TH U_SIZEOF_UCHAR);

	264 if (localConverter->subChars == NULL) {

	265 uprv_free(allocatedConverter);

	266 UTRACE_EXIT_STATUS(*status);

	267 return NULL;

	268 }

	269 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_L ENGTH * U_SIZEOF_UCHAR);

	270 }

	271

	272 /* now either call the safeclone fcn or not */

	273 if (cnv->sharedData->impl->safeClone != NULL) {

	274 /* call the custom safeClone function */

	275 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, p BufferSize, status);

	276 }

	277

	278 if(localConverter==NULL \|\| U_FAILURE(*status)) {

	279 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8 _t *)allocatedConverter->subUChars) {

	280 uprv_free(allocatedConverter->subChars);

	281 }

	282 uprv_free(allocatedConverter);

	283 UTRACE_EXIT_STATUS(*status);

	284 return NULL;

	285 }

	286

	287 /* increment refcount of shared data if needed */

	288 /*

	289 Checking whether it's an algorithic converter is okay

	290 in multithreaded applications because the value never changes.

	291 Don't check referenceCounter for any other value.

	292 */

	293 if (cnv->sharedData->referenceCounter != ~0) {

	294 ucnv_incrementRefCount(cnv->sharedData);

	295 }

	296

	297 if(localConverter == (UConverter*)stackBuffer) {

	298 /* we're using user provided data - set to not destroy */

	299 localConverter->isCopyLocal = TRUE;

	300 }

	301

	302 /* allow callback functions to handle any memory allocation */

	303 toUArgs.converter = fromUArgs.converter = localConverter;

	304 cbErr = U_ZERO_ERROR;

	305 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);

	306 cbErr = U_ZERO_ERROR;

	307 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV _CLONE, &cbErr);

	308

	309 UTRACE_EXIT_PTR_STATUS(localConverter, *status);

	310 return localConverter;

	311 }

	312

	313

	314

	315 /*Decreases the reference counter in the shared immutable section of the object

	316 and frees the mutable part/

	317

	318 U_CAPI void U_EXPORT2

	319 ucnv_close (UConverter * converter)

	320 {

	321 UErrorCode errorCode = U_ZERO_ERROR;

	322

	323 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);

	324

	325 if (converter == NULL)

	326 {

	327 UTRACE_EXIT();

	328 return;

	329 }

	330

	331 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",

	332 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);

	333

	334 /* In order to speed up the close, only call the callbacks when they have be en changed.

	335 This performance check will only work when the callbacks are set within a sh ared library

	336 or from user code that statically links this code. */

	337 /* first, notify the callback functions that the converter is closed */

	338 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {

	339 UConverterToUnicodeArgs toUArgs = {

	340 sizeof(UConverterToUnicodeArgs),

	341 TRUE,

	342 NULL,

	343 NULL,

	344 NULL,

	345 NULL,

	346 NULL,

	347 NULL

	348 };

	349

	350 toUArgs.converter = converter;

	351 errorCode = U_ZERO_ERROR;

	352 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);

	353 }

	354 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {

	355 UConverterFromUnicodeArgs fromUArgs = {

	356 sizeof(UConverterFromUnicodeArgs),

	357 TRUE,

	358 NULL,

	359 NULL,

	360 NULL,

	361 NULL,

	362 NULL,

	363 NULL

	364 };

	365 fromUArgs.converter = converter;

	366 errorCode = U_ZERO_ERROR;

	367 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);

	368 }

	369

	370 if (converter->sharedData->impl->close != NULL) {

	371 converter->sharedData->impl->close(converter);

	372 }

	373

	374 if (converter->subChars != (uint8_t *)converter->subUChars) {

	375 uprv_free(converter->subChars);

	376 }

	377

	378 /*

	379 Checking whether it's an algorithic converter is okay

	380 in multithreaded applications because the value never changes.

	381 Don't check referenceCounter for any other value.

	382 */

	383 if (converter->sharedData->referenceCounter != ~0) {

	384 ucnv_unloadSharedDataIfReady(converter->sharedData);

	385 }

	386

	387 if(!converter->isCopyLocal){

	388 uprv_free(converter);

	389 }

	390

	391 UTRACE_EXIT();

	392 }

	393

	394 /*returns a single Name from the list, will return NULL if out of bounds

	395 */

	396 U_CAPI const char* U_EXPORT2

	397 ucnv_getAvailableName (int32_t n)

	398 {

	399 if (0 <= n && n <= 0xffff) {

	400 UErrorCode err = U_ZERO_ERROR;

	401 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);

	402 if (U_SUCCESS(err)) {

	403 return name;

	404 }

	405 }

	406 return NULL;

	407 }

	408

	409 U_CAPI int32_t U_EXPORT2

	410 ucnv_countAvailable ()

	411 {

	412 UErrorCode err = U_ZERO_ERROR;

	413 return ucnv_bld_countAvailableConverters(&err);

	414 }

	415

	416 U_CAPI void U_EXPORT2

	417 ucnv_getSubstChars (const UConverter * converter,

	418 char *mySubChar,

	419 int8_t * len,

	420 UErrorCode * err)

	421 {

	422 if (U_FAILURE (*err))

	423 return;

	424

	425 if (converter->subCharLen <= 0) {

	426 /* Unicode string or empty string from ucnv_setSubstString(). */

	427 *len = 0;

	428 return;

	429 }

	430

	431 if (len < converter->subCharLen) /not enough space in subChars */

	432 {

	433 *err = U_INDEX_OUTOFBOUNDS_ERROR;

	434 return;

	435 }

	436

	437 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /fil ls in the subchars /

	438 len = converter->subCharLen; /store # of bytes copied to buffer */

	439 }

	440

	441 U_CAPI void U_EXPORT2

	442 ucnv_setSubstChars (UConverter * converter,

	443 const char *mySubChar,

	444 int8_t len,

	445 UErrorCode * err)

	446 {

	447 if (U_FAILURE (*err))

	448 return;

	449

	450 /Makes sure that the subChar is within the codepages char length boundaries /

	451 if ((len > converter->sharedData->staticData->maxBytesPerChar)

	452 \|\| (len < converter->sharedData->staticData->minBytesPerChar))

	453 {

	454 *err = U_ILLEGAL_ARGUMENT_ERROR;

	455 return;

	456 }

	457

	458 uprv_memcpy (converter->subChars, mySubChar, len); /copies the subchars /

	459 converter->subCharLen = len; /sets the new len /

	460

	461 /*

	462 * There is currently (2001Feb) no separate API to set/get subChar1.

	463 * In order to always have subChar written after it is explicitly set,

	464 * we set subChar1 to 0.

	465 */

	466 converter->subChar1 = 0;

	467

	468 return;

	469 }

	470

	471 U_CAPI void U_EXPORT2

	472 ucnv_setSubstString(UConverter *cnv,

	473 const UChar *s,

	474 int32_t length,

	475 UErrorCode *err) {

	476 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemor y) + 1];

	477 char chars[UCNV_ERROR_BUFFER_LENGTH];

	478

	479 UConverter *clone;

	480 uint8_t *subChars;

	481 int32_t cloneSize, length8;

	482

	483 /* Let the following functions check all arguments. */

	484 cloneSize = sizeof(cloneBuffer);

	485 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);

	486 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, er r);

	487 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, e rr);

	488 ucnv_close(clone);

	489 if (U_FAILURE(*err)) {

	490 return;

	491 }

	492

	493 if (cnv->sharedData->impl->writeSub == NULL

	494 #if !UCONFIG_NO_LEGACY_CONVERSION

	495 \|\| (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&

	496 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)

	497 #endif

	498 ) {

	499 /* The converter is not stateful. Store the charset bytes as a fixed str ing. */

	500 subChars = (uint8_t *)chars;

	501 } else {

	502 /*

	503 * The converter has a non-default writeSub() function, indicating

	504 * that it is stateful.

	505 * Store the Unicode string for on-the-fly conversion for correct

	506 * state handling.

	507 */

	508 if (length > UCNV_ERROR_BUFFER_LENGTH) {

	509 /*

	510 * Should not occur. The converter should output at least one byte

	511 * per UChar, which means that ucnv_fromUChars() should catch all

	512 * overflows.

	513 */

	514 *err = U_BUFFER_OVERFLOW_ERROR;

	515 return;

	516 }

	517 subChars = (uint8_t *)s;

	518 if (length < 0) {

	519 length = u_strlen(s);

	520 }

	521 length8 = length * U_SIZEOF_UCHAR;

	522 }

	523

	524 /*

	525 * For storing the substitution string, select either the small buffer insid e

	526 * UConverter or allocate a subChars buffer.

	527 */

	528 if (length8 > UCNV_MAX_SUBCHAR_LEN) {

	529 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */

	530 if (cnv->subChars == (uint8_t *)cnv->subUChars) {

	531 /* Allocate a new buffer for the string. */

	532 cnv->subChars = (uint8_t )uprv_malloc(UCNV_ERROR_BUFFER_LENGTH U_ SIZEOF_UCHAR);

	533 if (cnv->subChars == NULL) {

	534 cnv->subChars = (uint8_t *)cnv->subUChars;

	535 *err = U_MEMORY_ALLOCATION_ERROR;

	536 return;

	537 }

	538 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UC HAR);

	539 }

	540 }

	541

	542 /* Copy the substitution string into the UConverter or its subChars buffer. */

	543 if (length8 == 0) {

	544 cnv->subCharLen = 0;

	545 } else {

	546 uprv_memcpy(cnv->subChars, subChars, length8);

	547 if (subChars == (uint8_t *)chars) {

	548 cnv->subCharLen = (int8_t)length8;

	549 } else /* subChars == s */ {

	550 cnv->subCharLen = (int8_t)-length;

	551 }

	552 }

	553

	554 /* See comment in ucnv_setSubstChars(). */

	555 cnv->subChar1 = 0;

	556 }

	557

	558 /*resets the internal states of a converter

	559 *goal : have the same behaviour than a freshly created converter

	560 */

	561 static void _reset(UConverter *converter, UConverterResetChoice choice,

	562 UBool callCallback) {

	563 if(converter == NULL) {

	564 return;

	565 }

	566

	567 if(callCallback) {

	568 /* first, notify the callback functions that the converter is reset */

	569 UErrorCode errorCode;

	570

	571 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {

	572 UConverterToUnicodeArgs toUArgs = {

	573 sizeof(UConverterToUnicodeArgs),

	574 TRUE,

	575 NULL,

	576 NULL,

	577 NULL,

	578 NULL,

	579 NULL,

	580 NULL

	581 };

	582 toUArgs.converter = converter;

	583 errorCode = U_ZERO_ERROR;

	584 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, N ULL, 0, UCNV_RESET, &errorCode);

	585 }

	586 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour ! = UCNV_FROM_U_DEFAULT_CALLBACK) {

	587 UConverterFromUnicodeArgs fromUArgs = {

	588 sizeof(UConverterFromUnicodeArgs),

	589 TRUE,

	590 NULL,

	591 NULL,

	592 NULL,

	593 NULL,

	594 NULL,

	595 NULL

	596 };

	597 fromUArgs.converter = converter;

	598 errorCode = U_ZERO_ERROR;

	599 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUAr gs, NULL, 0, 0, UCNV_RESET, &errorCode);

	600 }

	601 }

	602

	603 /* now reset the converter itself */

	604 if(choice<=UCNV_RESET_TO_UNICODE) {

	605 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;

	606 converter->mode = 0;

	607 converter->toULength = 0;

	608 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;

	609 converter->preToULength = 0;

	610 }

	611 if(choice!=UCNV_RESET_TO_UNICODE) {

	612 converter->fromUnicodeStatus = 0;

	613 converter->fromUChar32 = 0;

	614 converter->invalidUCharLength = converter->charErrorBufferLength = 0;

	615 converter->preFromUFirstCP = U_SENTINEL;

	616 converter->preFromULength = 0;

	617 }

	618

	619 if (converter->sharedData->impl->reset != NULL) {

	620 /* call the custom reset function */

	621 converter->sharedData->impl->reset(converter, choice);

	622 }

	623 }

	624

	625 U_CAPI void U_EXPORT2

	626 ucnv_reset(UConverter *converter)

	627 {

	628 _reset(converter, UCNV_RESET_BOTH, TRUE);

	629 }

	630

	631 U_CAPI void U_EXPORT2

	632 ucnv_resetToUnicode(UConverter *converter)

	633 {

	634 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);

	635 }

	636

	637 U_CAPI void U_EXPORT2

	638 ucnv_resetFromUnicode(UConverter *converter)

	639 {

	640 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);

	641 }

	642

	643 U_CAPI int8_t U_EXPORT2

	644 ucnv_getMaxCharSize (const UConverter * converter)

	645 {

	646 return converter->maxBytesPerUChar;

	647 }

	648

	649

	650 U_CAPI int8_t U_EXPORT2

	651 ucnv_getMinCharSize (const UConverter * converter)

	652 {

	653 return converter->sharedData->staticData->minBytesPerChar;

	654 }

	655

	656 U_CAPI const char* U_EXPORT2

	657 ucnv_getName (const UConverter * converter, UErrorCode * err)

	658

	659 {

	660 if (U_FAILURE (*err))

	661 return NULL;

	662 if(converter->sharedData->impl->getName){

	663 const char* temp= converter->sharedData->impl->getName(converter);

	664 if(temp)

	665 return temp;

	666 }

	667 return converter->sharedData->staticData->name;

	668 }

	669

	670 U_CAPI int32_t U_EXPORT2

	671 ucnv_getCCSID(const UConverter * converter,

	672 UErrorCode * err)

	673 {

	674 int32_t ccsid;

	675 if (U_FAILURE (*err))

	676 return -1;

	677

	678 ccsid = converter->sharedData->staticData->codepage;

	679 if (ccsid == 0) {

	680 /* Rare case. This is for cases like gb18030,

	681 which doesn't have an IBM cannonical name, but does have an IBM alias. * /

	682 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);

	683 if (U_SUCCESS(*err) && standardName) {

	684 const char *ccsidStr = uprv_strchr(standardName, '-');

	685 if (ccsidStr) {

	686 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */

	687 }

	688 }

	689 }

	690 return ccsid;

	691 }

	692

	693

	694 U_CAPI UConverterPlatform U_EXPORT2

	695 ucnv_getPlatform (const UConverter * converter,

	696 UErrorCode * err)

	697 {

	698 if (U_FAILURE (*err))

	699 return UCNV_UNKNOWN;

	700

	701 return (UConverterPlatform)converter->sharedData->staticData->platform;

	702 }

	703

	704 U_CAPI void U_EXPORT2

	705 ucnv_getToUCallBack (const UConverter * converter,

	706 UConverterToUCallback *action,

	707 const void **context)

	708 {

	709 *action = converter->fromCharErrorBehaviour;

	710 *context = converter->toUContext;

	711 }

	712

	713 U_CAPI void U_EXPORT2

	714 ucnv_getFromUCallBack (const UConverter * converter,

	715 UConverterFromUCallback *action,

	716 const void **context)

	717 {

	718 *action = converter->fromUCharErrorBehaviour;

	719 *context = converter->fromUContext;

	720 }

	721

	722 U_CAPI void U_EXPORT2

	723 ucnv_setToUCallBack (UConverter * converter,

	724 UConverterToUCallback newAction,

	725 const void* newContext,

	726 UConverterToUCallback *oldAction,

	727 const void** oldContext,

	728 UErrorCode * err)

	729 {

	730 if (U_FAILURE (*err))

	731 return;

	732 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;

	733 converter->fromCharErrorBehaviour = newAction;

	734 if (oldContext) *oldContext = converter->toUContext;

	735 converter->toUContext = newContext;

	736 }

	737

	738 U_CAPI void U_EXPORT2

	739 ucnv_setFromUCallBack (UConverter * converter,

	740 UConverterFromUCallback newAction,

	741 const void* newContext,

	742 UConverterFromUCallback *oldAction,

	743 const void** oldContext,

	744 UErrorCode * err)

	745 {

	746 if (U_FAILURE (*err))

	747 return;

	748 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;

	749 converter->fromUCharErrorBehaviour = newAction;

	750 if (oldContext) *oldContext = converter->fromUContext;

	751 converter->fromUContext = newContext;

	752 }

	753

	754 static void

	755 _updateOffsets(int32_t *offsets, int32_t length,

	756 int32_t sourceIndex, int32_t errorInputLength) {

	757 int32_t *limit;

	758 int32_t delta, offset;

	759

	760 if(sourceIndex>=0) {

	761 /*

	762 * adjust each offset by adding the previous sourceIndex

	763 * minus the length of the input sequence that caused an

	764 * error, if any

	765 */

	766 delta=sourceIndex-errorInputLength;

	767 } else {

	768 /*

	769 * set each offset to -1 because this conversion function

	770 * does not handle offsets

	771 */

	772 delta=-1;

	773 }

	774

	775 limit=offsets+length;

	776 if(delta==0) {

	777 /* most common case, nothing to do */

	778 } else if(delta>0) {

	779 /* add the delta to each offset (but not if the offset is <0) */

	780 while(offsets<limit) {

	781 offset=*offsets;

	782 if(offset>=0) {

	783 *offsets=offset+delta;

	784 }

	785 ++offsets;

	786 }

	787 } else /* delta<0 */ {

	788 /*

	789 * set each offset to -1 because this conversion function

	790 * does not handle offsets

	791 * or the error input sequence started in a previous buffer

	792 */

	793 while(offsets<limit) {

	794 *offsets++=-1;

	795 }

	796 }

	797 }

	798

	799 /* ucnv_fromUnicode --------------------------------------------------------- */

	800

	801 /*

	802 * Implementation note for m:n conversions

	803 *

	804 * While collecting source units to find the longest match for m:n conversion,

	805 * some source units may need to be stored for a partial match.

	806 * When a second buffer does not yield a match on all of the previously stored

	807 * source units, then they must be "replayed", i.e., fed back into the converter .

	808 *

	809 * The code relies on the fact that replaying will not nest -

	810 * converting a replay buffer will not result in a replay.

	811 * This is because a replay is necessary only after the _continuation_ of a

	812 * partial match failed, but a replay buffer is converted as a whole.

	813 * It may result in some of its units being stored again for a partial match,

	814 * but there will not be a continuation _during_ the replay which could fail.

	815 *

	816 * It is conceivable that a callback function could call the converter

	817 * recursively in a way that causes another replay to be stored, but that

	818 * would be an error in the callback function.

	819 * Such violations will cause assertion failures in a debug build,

	820 * and wrong output, but they will not cause a crash.

	821 */

	822

	823 static void

	824 _fromUnicodeWithCallback(UConverterFromUnicodeArgs pArgs, UErrorCode err) {

	825 UConverterFromUnicode fromUnicode;

	826 UConverter *cnv;

	827 const UChar *s;

	828 char *t;

	829 int32_t *offsets;

	830 int32_t sourceIndex;

	831 int32_t errorInputLength;

	832 UBool converterSawEndOfInput, calledCallback;

	833

	834 /* variables for m:n conversion */

	835 UChar replay[UCNV_EXT_MAX_UCHARS];

	836 const UChar realSource, realSourceLimit;

	837 int32_t realSourceIndex;

	838 UBool realFlush;

	839

	840 cnv=pArgs->converter;

	841 s=pArgs->source;

	842 t=pArgs->target;

	843 offsets=pArgs->offsets;

	844

	845 /* get the converter implementation function */

	846 sourceIndex=0;

	847 if(offsets==NULL) {

	848 fromUnicode=cnv->sharedData->impl->fromUnicode;

	849 } else {

	850 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;

	851 if(fromUnicode==NULL) {

	852 /* there is no WithOffsets implementation */

	853 fromUnicode=cnv->sharedData->impl->fromUnicode;

	854 /* we will write -1 for each offset */

	855 sourceIndex=-1;

	856 }

	857 }

	858

	859 if(cnv->preFromULength>=0) {

	860 /* normal mode */

	861 realSource=NULL;

	862

	863 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */

	864 realSourceLimit=NULL;

	865 realFlush=FALSE;

	866 realSourceIndex=0;

	867 } else {

	868 /*

	869 * Previous m:n conversion stored source units from a partial match

	870 * and failed to consume all of them.

	871 * We need to "replay" them from a temporary buffer and convert them fir st.

	872 */

	873 realSource=pArgs->source;

	874 realSourceLimit=pArgs->sourceLimit;

	875 realFlush=pArgs->flush;

	876 realSourceIndex=sourceIndex;

	877

	878 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);

	879 pArgs->source=replay;

	880 pArgs->sourceLimit=replay-cnv->preFromULength;

	881 pArgs->flush=FALSE;

	882 sourceIndex=-1;

	883

	884 cnv->preFromULength=0;

	885 }

	886

	887 /*

	888 * loop for conversion and error handling

	889 *

	890 * loop {

	891 * convert

	892 * loop {

	893 * update offsets

	894 * handle end of input

	895 * handle errors/call callback

	896 * }

	897 * }

	898 */

	899 for(;;) {

	900 if(U_SUCCESS(*err)) {

	901 /* convert */

	902 fromUnicode(pArgs, err);

	903

	904 /*

	905 * set a flag for whether the converter

	906 * successfully processed the end of the input

	907 *

	908 * need not check cnv->preFromULength==0 because a replay (<0) will cause

	909 * s<sourceLimit before converterSawEndOfInput is checked

	910 */

	911 converterSawEndOfInput=

	912 (UBool)(U_SUCCESS(*err) &&

	913 pArgs->flush && pArgs->source==pArgs->sourceLimit &&

	914 cnv->fromUChar32==0);

	915 } else {

	916 /* handle error from ucnv_convertEx() */

	917 converterSawEndOfInput=FALSE;

	918 }

	919

	920 /* no callback called yet for this iteration */

	921 calledCallback=FALSE;

	922

	923 /* no sourceIndex adjustment for conversion, only for callback output */

	924 errorInputLength=0;

	925

	926 /*

	927 * loop for offsets and error handling

	928 *

	929 * iterates at most 3 times:

	930 * 1. to clean up after the conversion function

	931 * 2. after the callback

	932 * 3. after the callback again if there was truncated input

	933 */

	934 for(;;) {

	935 /* update offsets if we write any */

	936 if(offsets!=NULL) {

	937 int32_t length=(int32_t)(pArgs->target-t);

	938 if(length>0) {

	939 _updateOffsets(offsets, length, sourceIndex, errorInputLengt h);

	940

	941 /*

	942 * if a converter handles offsets and updates the offsets

	943 * pointer at the end, then pArgs->offset should not change

	944 * here;

	945 * however, some converters do not handle offsets at all

	946 * (sourceIndex<0) or may not update the offsets pointer

	947 */

	948 pArgs->offsets=offsets+=length;

	949 }

	950

	951 if(sourceIndex>=0) {

	952 sourceIndex+=(int32_t)(pArgs->source-s);

	953 }

	954 }

	955

	956 if(cnv->preFromULength<0) {

	957 /*

	958 * switch the source to new replay units (cannot occur while rep laying)

	959 * after offset handling and before end-of-input and callback ha ndling

	960 */

	961 if(realSource==NULL) {

	962 realSource=pArgs->source;

	963 realSourceLimit=pArgs->sourceLimit;

	964 realFlush=pArgs->flush;

	965 realSourceIndex=sourceIndex;

	966

	967 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SI ZEOF_UCHAR);

	968 pArgs->source=replay;

	969 pArgs->sourceLimit=replay-cnv->preFromULength;

	970 pArgs->flush=FALSE;

	971 if((sourceIndex+=cnv->preFromULength)<0) {

	972 sourceIndex=-1;

	973 }

	974

	975 cnv->preFromULength=0;

	976 } else {

	977 /* see implementation note before _fromUnicodeWithCallback() */

	978 U_ASSERT(realSource==NULL);

	979 *err=U_INTERNAL_PROGRAM_ERROR;

	980 }

	981 }

	982

	983 /* update pointers */

	984 s=pArgs->source;

	985 t=pArgs->target;

	986

	987 if(U_SUCCESS(*err)) {

	988 if(s<pArgs->sourceLimit) {

	989 /*

	990 * continue with the conversion loop while there is still in put left

	991 * (continue converting by breaking out of only the inner lo op)

	992 */

	993 break;

	994 } else if(realSource!=NULL) {

	995 /* switch back from replaying to the real source and continu e */

	996 pArgs->source=realSource;

	997 pArgs->sourceLimit=realSourceLimit;

	998 pArgs->flush=realFlush;

	999 sourceIndex=realSourceIndex;

	1000

	1001 realSource=NULL;

	1002 break;

	1003 } else if(pArgs->flush && cnv->fromUChar32!=0) {

	1004 /*

	1005 * the entire input stream is consumed

	1006 * and there is a partial, truncated input sequence left

	1007 */

	1008

	1009 /* inject an error and continue with callback handling */

	1010 *err=U_TRUNCATED_CHAR_FOUND;

	1011 calledCallback=FALSE; /* new error condition */

	1012 } else {

	1013 /* input consumed */

	1014 if(pArgs->flush) {

	1015 /*

	1016 * return to the conversion loop once more if the flush

	1017 * flag is set and the conversion function has not

	1018 * successfully processed the end of the input yet

	1019 *

	1020 * (continue converting by breaking out of only the inne r loop)

	1021 */

	1022 if(!converterSawEndOfInput) {

	1023 break;

	1024 }

	1025

	1026 /* reset the converter without calling the callback func tion */

	1027 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);

	1028 }

	1029

	1030 /* done successfully */

	1031 return;

	1032 }

	1033 }

	1034

	1035 /* U_FAILURE(err) /

	1036 {

	1037 UErrorCode e;

	1038

	1039 if( calledCallback \|\|

	1040 (e=*err)==U_BUFFER_OVERFLOW_ERROR \|\|

	1041 (e!=U_INVALID_CHAR_FOUND &&

	1042 e!=U_ILLEGAL_CHAR_FOUND &&

	1043 e!=U_TRUNCATED_CHAR_FOUND)

	1044 ) {

	1045 /*

	1046 * the callback did not or cannot resolve the error:

	1047 * set output pointers and return

	1048 *

	1049 * the check for buffer overflow is redundant but it is

	1050 * a high-runner case and hopefully documents the intent

	1051 * well

	1052 *

	1053 * if we were replaying, then the replay buffer must be

	1054 * copied back into the UConverter

	1055 * and the real arguments must be restored

	1056 */

	1057 if(realSource!=NULL) {

	1058 int32_t length;

	1059

	1060 U_ASSERT(cnv->preFromULength==0);

	1061

	1062 length=(int32_t)(pArgs->sourceLimit-pArgs->source);

	1063 if(length>0) {

	1064 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_S IZEOF_UCHAR);

	1065 cnv->preFromULength=(int8_t)-length;

	1066 }

	1067

	1068 pArgs->source=realSource;

	1069 pArgs->sourceLimit=realSourceLimit;

	1070 pArgs->flush=realFlush;

	1071 }

	1072

	1073 return;

	1074 }

	1075 }

	1076

	1077 /* callback handling */

	1078 {

	1079 UChar32 codePoint;

	1080

	1081 /* get and write the code point */

	1082 codePoint=cnv->fromUChar32;

	1083 errorInputLength=0;

	1084 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, cod ePoint);

	1085 cnv->invalidUCharLength=(int8_t)errorInputLength;

	1086

	1087 /* set the converter state to deal with the next character */

	1088 cnv->fromUChar32=0;

	1089

	1090 /* call the callback function */

	1091 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,

	1092 cnv->invalidUCharBuffer, errorInputLength, codePoint,

	1093 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,

	1094 err);

	1095 }

	1096

	1097 /*

	1098 * loop back to the offset handling

	1099 *

	1100 * this flag will indicate after offset handling

	1101 * that a callback was called;

	1102 * if the callback did not resolve the error, then we return

	1103 */

	1104 calledCallback=TRUE;

	1105 }

	1106 }

	1107 }

	1108

	1109 /*

	1110 * Output the fromUnicode overflow buffer.

	1111 * Call this function if(cnv->charErrorBufferLength>0).

	1112 * @return TRUE if overflow

	1113 */

	1114 static UBool

	1115 ucnv_outputOverflowFromUnicode(UConverter *cnv,

	1116 char *target, const char targetLimit,

	1117 int32_t **pOffsets,

	1118 UErrorCode *err) {

	1119 int32_t *offsets;

	1120 char overflow, t;

	1121 int32_t i, length;

	1122

	1123 t=*target;

	1124 if(pOffsets!=NULL) {

	1125 offsets=*pOffsets;

	1126 } else {

	1127 offsets=NULL;

	1128 }

	1129

	1130 overflow=(char *)cnv->charErrorBuffer;

	1131 length=cnv->charErrorBufferLength;

	1132 i=0;

	1133 while(i<length) {

	1134 if(t==targetLimit) {

	1135 /* the overflow buffer contains too much, keep the rest */

	1136 int32_t j=0;

	1137

	1138 do {

	1139 overflow[j++]=overflow[i++];

	1140 } while(i<length);

	1141

	1142 cnv->charErrorBufferLength=(int8_t)j;

	1143 *target=t;

	1144 if(offsets!=NULL) {

	1145 *pOffsets=offsets;

	1146 }

	1147 *err=U_BUFFER_OVERFLOW_ERROR;

	1148 return TRUE;

	1149 }

	1150

	1151 /* copy the overflow contents to the target */

	1152 *t++=overflow[i++];

	1153 if(offsets!=NULL) {

	1154 offsets++=-1; / no source index available for old output */

	1155 }

	1156 }

	1157

	1158 /* the overflow buffer is completely copied to the target */

	1159 cnv->charErrorBufferLength=0;

	1160 *target=t;

	1161 if(offsets!=NULL) {

	1162 *pOffsets=offsets;

	1163 }

	1164 return FALSE;

	1165 }

	1166

	1167 U_CAPI void U_EXPORT2

	1168 ucnv_fromUnicode(UConverter *cnv,

	1169 char *target, const char targetLimit,

	1170 const UChar *source, const UChar sourceLimit,

	1171 int32_t *offsets,

	1172 UBool flush,

	1173 UErrorCode *err) {

	1174 UConverterFromUnicodeArgs args;

	1175 const UChar *s;

	1176 char *t;

	1177

	1178 /* check parameters */

	1179 if(err==NULL \|\| U_FAILURE(*err)) {

	1180 return;

	1181 }

	1182

	1183 if(cnv==NULL \|\| target==NULL \|\| source==NULL) {

	1184 *err=U_ILLEGAL_ARGUMENT_ERROR;

	1185 return;

	1186 }

	1187

	1188 s=*source;

	1189 t=*target;

	1190

	1191 if ((const void )U_MAX_PTR(sourceLimit) == (const void )sourceLimit) {

	1192 /*

	1193 Prevent code from going into an infinite loop in case we do hit this

	1194 limit. The limit pointer is expected to be on a UChar * boundary.

	1195 This also prevents the next argument check from failing.

	1196 */

	1197 sourceLimit = (const UChar )(((const char )sourceLimit) - 1);

	1198 }

	1199

	1200 /*

	1201 * All these conditions should never happen.

	1202 *

	1203 * 1) Make sure that the limits are >= to the address source or target

	1204 *

	1205 * 2) Make sure that the buffer sizes do not exceed the number range for

	1206 * int32_t because some functions use the size (in units or bytes)

	1207 * rather than comparing pointers, and because offsets are int32_t values.

	1208 *

	1209 * size_t is guaranteed to be unsigned and large enough for the job.

	1210 *

	1211 * Return with an error instead of adjusting the limits because we would

	1212 * not be able to maintain the semantics that either the source must be

	1213 * consumed or the target filled (unless an error occurs).

	1214 * An adjustment would be targetLimit=t+0x7fffffff; for example.

	1215 *

	1216 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer

	1217 * to a char * pointer and provide an incomplete UChar code unit.

	1218 */

	1219 if (sourceLimit<s \|\| targetLimit<t \|\|

	1220 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) \|\|

	1221 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) \|\|

	1222 (((const char )sourceLimit-(const char )s) & 1) != 0)

	1223 {

	1224 *err=U_ILLEGAL_ARGUMENT_ERROR;

	1225 return;

	1226 }

	1227

	1228 /* output the target overflow buffer */

	1229 if( cnv->charErrorBufferLength>0 &&

	1230 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)

	1231 ) {

	1232 /* U_BUFFER_OVERFLOW_ERROR */

	1233 return;

	1234 }

	1235 /* target may have moved, therefore stop using t /

	1236

	1237 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {

	1238 /* the overflow buffer is emptied and there is no new input: we are done */

	1239 return;

	1240 }

	1241

	1242 /*

	1243 * Do not simply return with a buffer overflow error if

	1244 * !flush && t==targetLimit

	1245 * because it is possible that the source will not generate any output.

	1246 * For example, the skip callback may be called;

	1247 * it does not output anything.

	1248 */

	1249

	1250 /* prepare the converter arguments */

	1251 args.converter=cnv;

	1252 args.flush=flush;

	1253 args.offsets=offsets;

	1254 args.source=s;

	1255 args.sourceLimit=sourceLimit;

	1256 args.target=*target;

	1257 args.targetLimit=targetLimit;

	1258 args.size=sizeof(args);

	1259

	1260 _fromUnicodeWithCallback(&args, err);

	1261

	1262 *source=args.source;

	1263 *target=args.target;

	1264 }

	1265

	1266 /* ucnv_toUnicode() --------------------------------------------------------- */

	1267

	1268 static void

	1269 _toUnicodeWithCallback(UConverterToUnicodeArgs pArgs, UErrorCode err) {

	1270 UConverterToUnicode toUnicode;

	1271 UConverter *cnv;

	1272 const char *s;

	1273 UChar *t;

	1274 int32_t *offsets;

	1275 int32_t sourceIndex;

	1276 int32_t errorInputLength;

	1277 UBool converterSawEndOfInput, calledCallback;

	1278

	1279 /* variables for m:n conversion */

	1280 char replay[UCNV_EXT_MAX_BYTES];

	1281 const char realSource, realSourceLimit;

	1282 int32_t realSourceIndex;

	1283 UBool realFlush;

	1284

	1285 cnv=pArgs->converter;

	1286 s=pArgs->source;

	1287 t=pArgs->target;

	1288 offsets=pArgs->offsets;

	1289

	1290 /* get the converter implementation function */

	1291 sourceIndex=0;

	1292 if(offsets==NULL) {

	1293 toUnicode=cnv->sharedData->impl->toUnicode;

	1294 } else {

	1295 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;

	1296 if(toUnicode==NULL) {

	1297 /* there is no WithOffsets implementation */

	1298 toUnicode=cnv->sharedData->impl->toUnicode;

	1299 /* we will write -1 for each offset */

	1300 sourceIndex=-1;

	1301 }

	1302 }

	1303

	1304 if(cnv->preToULength>=0) {

	1305 /* normal mode */

	1306 realSource=NULL;

	1307

	1308 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */

	1309 realSourceLimit=NULL;

	1310 realFlush=FALSE;

	1311 realSourceIndex=0;

	1312 } else {

	1313 /*

	1314 * Previous m:n conversion stored source units from a partial match

	1315 * and failed to consume all of them.

	1316 * We need to "replay" them from a temporary buffer and convert them fir st.

	1317 */

	1318 realSource=pArgs->source;

	1319 realSourceLimit=pArgs->sourceLimit;

	1320 realFlush=pArgs->flush;

	1321 realSourceIndex=sourceIndex;

	1322

	1323 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);

	1324 pArgs->source=replay;

	1325 pArgs->sourceLimit=replay-cnv->preToULength;

	1326 pArgs->flush=FALSE;

	1327 sourceIndex=-1;

	1328

	1329 cnv->preToULength=0;

	1330 }

	1331

	1332 /*

	1333 * loop for conversion and error handling

	1334 *

	1335 * loop {

	1336 * convert

	1337 * loop {

	1338 * update offsets

	1339 * handle end of input

	1340 * handle errors/call callback

	1341 * }

	1342 * }

	1343 */

	1344 for(;;) {

	1345 if(U_SUCCESS(*err)) {

	1346 /* convert */

	1347 toUnicode(pArgs, err);

	1348

	1349 /*

	1350 * set a flag for whether the converter

	1351 * successfully processed the end of the input

	1352 *

	1353 * need not check cnv->preToULength==0 because a replay (<0) will ca use

	1354 * s<sourceLimit before converterSawEndOfInput is checked

	1355 */

	1356 converterSawEndOfInput=

	1357 (UBool)(U_SUCCESS(*err) &&

	1358 pArgs->flush && pArgs->source==pArgs->sourceLimit &&

	1359 cnv->toULength==0);

	1360 } else {

	1361 /* handle error from getNextUChar() or ucnv_convertEx() */

	1362 converterSawEndOfInput=FALSE;

	1363 }

	1364

	1365 /* no callback called yet for this iteration */

	1366 calledCallback=FALSE;

	1367

	1368 /* no sourceIndex adjustment for conversion, only for callback output */

	1369 errorInputLength=0;

	1370

	1371 /*

	1372 * loop for offsets and error handling

	1373 *

	1374 * iterates at most 3 times:

	1375 * 1. to clean up after the conversion function

	1376 * 2. after the callback

	1377 * 3. after the callback again if there was truncated input

	1378 */

	1379 for(;;) {

	1380 /* update offsets if we write any */

	1381 if(offsets!=NULL) {

	1382 int32_t length=(int32_t)(pArgs->target-t);

	1383 if(length>0) {

	1384 _updateOffsets(offsets, length, sourceIndex, errorInputLengt h);

	1385

	1386 /*

	1387 * if a converter handles offsets and updates the offsets

	1388 * pointer at the end, then pArgs->offset should not change

	1389 * here;

	1390 * however, some converters do not handle offsets at all

	1391 * (sourceIndex<0) or may not update the offsets pointer

	1392 */

	1393 pArgs->offsets=offsets+=length;

	1394 }

	1395

	1396 if(sourceIndex>=0) {

	1397 sourceIndex+=(int32_t)(pArgs->source-s);

	1398 }

	1399 }

	1400

	1401 if(cnv->preToULength<0) {

	1402 /*

	1403 * switch the source to new replay units (cannot occur while rep laying)

	1404 * after offset handling and before end-of-input and callback ha ndling

	1405 */

	1406 if(realSource==NULL) {

	1407 realSource=pArgs->source;

	1408 realSourceLimit=pArgs->sourceLimit;

	1409 realFlush=pArgs->flush;

	1410 realSourceIndex=sourceIndex;

	1411

	1412 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);

	1413 pArgs->source=replay;

	1414 pArgs->sourceLimit=replay-cnv->preToULength;

	1415 pArgs->flush=FALSE;

	1416 if((sourceIndex+=cnv->preToULength)<0) {

	1417 sourceIndex=-1;

	1418 }

	1419

	1420 cnv->preToULength=0;

	1421 } else {

	1422 /* see implementation note before _fromUnicodeWithCallback() */

	1423 U_ASSERT(realSource==NULL);

	1424 *err=U_INTERNAL_PROGRAM_ERROR;

	1425 }

	1426 }

	1427

	1428 /* update pointers */

	1429 s=pArgs->source;

	1430 t=pArgs->target;

	1431

	1432 if(U_SUCCESS(*err)) {

	1433 if(s<pArgs->sourceLimit) {

	1434 /*

	1435 * continue with the conversion loop while there is still in put left

	1436 * (continue converting by breaking out of only the inner lo op)

	1437 */

	1438 break;

	1439 } else if(realSource!=NULL) {

	1440 /* switch back from replaying to the real source and continu e */

	1441 pArgs->source=realSource;

	1442 pArgs->sourceLimit=realSourceLimit;

	1443 pArgs->flush=realFlush;

	1444 sourceIndex=realSourceIndex;

	1445

	1446 realSource=NULL;

	1447 break;

	1448 } else if(pArgs->flush && cnv->toULength>0) {

	1449 /*

	1450 * the entire input stream is consumed

	1451 * and there is a partial, truncated input sequence left

	1452 */

	1453

	1454 /* inject an error and continue with callback handling */

	1455 *err=U_TRUNCATED_CHAR_FOUND;

	1456 calledCallback=FALSE; /* new error condition */

	1457 } else {

	1458 /* input consumed */

	1459 if(pArgs->flush) {

	1460 /*

	1461 * return to the conversion loop once more if the flush

	1462 * flag is set and the conversion function has not

	1463 * successfully processed the end of the input yet

	1464 *

	1465 * (continue converting by breaking out of only the inne r loop)

	1466 */

	1467 if(!converterSawEndOfInput) {

	1468 break;

	1469 }

	1470

	1471 /* reset the converter without calling the callback func tion */

	1472 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);

	1473 }

	1474

	1475 /* done successfully */

	1476 return;

	1477 }

	1478 }

	1479

	1480 /* U_FAILURE(err) /

	1481 {

	1482 UErrorCode e;

	1483

	1484 if( calledCallback \|\|

	1485 (e=*err)==U_BUFFER_OVERFLOW_ERROR \|\|

	1486 (e!=U_INVALID_CHAR_FOUND &&

	1487 e!=U_ILLEGAL_CHAR_FOUND &&

	1488 e!=U_TRUNCATED_CHAR_FOUND &&

	1489 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&

	1490 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)

	1491 ) {

	1492 /*

	1493 * the callback did not or cannot resolve the error:

	1494 * set output pointers and return

	1495 *

	1496 * the check for buffer overflow is redundant but it is

	1497 * a high-runner case and hopefully documents the intent

	1498 * well

	1499 *

	1500 * if we were replaying, then the replay buffer must be

	1501 * copied back into the UConverter

	1502 * and the real arguments must be restored

	1503 */

	1504 if(realSource!=NULL) {

	1505 int32_t length;

	1506

	1507 U_ASSERT(cnv->preToULength==0);

	1508

	1509 length=(int32_t)(pArgs->sourceLimit-pArgs->source);

	1510 if(length>0) {

	1511 uprv_memcpy(cnv->preToU, pArgs->source, length);

	1512 cnv->preToULength=(int8_t)-length;

	1513 }

	1514

	1515 pArgs->source=realSource;

	1516 pArgs->sourceLimit=realSourceLimit;

	1517 pArgs->flush=realFlush;

	1518 }

	1519

	1520 return;

	1521 }

	1522 }

	1523

	1524 /* copy toUBytes[] to invalidCharBuffer[] */

	1525 errorInputLength=cnv->invalidCharLength=cnv->toULength;

	1526 if(errorInputLength>0) {

	1527 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLen gth);

	1528 }

	1529

	1530 /* set the converter state to deal with the next character */

	1531 cnv->toULength=0;

	1532

	1533 /* call the callback function */

	1534 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUN D) {

	1535 cnv->toUCallbackReason = UCNV_UNASSIGNED;

	1536 }

	1537 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,

	1538 cnv->invalidCharBuffer, errorInputLength,

	1539 cnv->toUCallbackReason,

	1540 err);

	1541 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */

	1542

	1543 /*

	1544 * loop back to the offset handling

	1545 *

	1546 * this flag will indicate after offset handling

	1547 * that a callback was called;

	1548 * if the callback did not resolve the error, then we return

	1549 */

	1550 calledCallback=TRUE;

	1551 }

	1552 }

	1553 }

	1554

	1555 /*

	1556 * Output the toUnicode overflow buffer.

	1557 * Call this function if(cnv->UCharErrorBufferLength>0).

	1558 * @return TRUE if overflow

	1559 */

	1560 static UBool

	1561 ucnv_outputOverflowToUnicode(UConverter *cnv,

	1562 UChar *target, const UChar targetLimit,

	1563 int32_t **pOffsets,

	1564 UErrorCode *err) {

	1565 int32_t *offsets;

	1566 UChar overflow, t;

	1567 int32_t i, length;

	1568

	1569 t=*target;

	1570 if(pOffsets!=NULL) {

	1571 offsets=*pOffsets;

	1572 } else {

	1573 offsets=NULL;

	1574 }

	1575

	1576 overflow=cnv->UCharErrorBuffer;

	1577 length=cnv->UCharErrorBufferLength;

	1578 i=0;

	1579 while(i<length) {

	1580 if(t==targetLimit) {

	1581 /* the overflow buffer contains too much, keep the rest */

	1582 int32_t j=0;

	1583

	1584 do {

	1585 overflow[j++]=overflow[i++];

	1586 } while(i<length);

	1587

	1588 cnv->UCharErrorBufferLength=(int8_t)j;

	1589 *target=t;

	1590 if(offsets!=NULL) {

	1591 *pOffsets=offsets;

	1592 }

	1593 *err=U_BUFFER_OVERFLOW_ERROR;

	1594 return TRUE;

	1595 }

	1596

	1597 /* copy the overflow contents to the target */

	1598 *t++=overflow[i++];

	1599 if(offsets!=NULL) {

	1600 offsets++=-1; / no source index available for old output */

	1601 }

	1602 }

	1603

	1604 /* the overflow buffer is completely copied to the target */

	1605 cnv->UCharErrorBufferLength=0;

	1606 *target=t;

	1607 if(offsets!=NULL) {

	1608 *pOffsets=offsets;

	1609 }

	1610 return FALSE;

	1611 }

	1612

	1613 U_CAPI void U_EXPORT2

	1614 ucnv_toUnicode(UConverter *cnv,

	1615 UChar *target, const UChar targetLimit,

	1616 const char *source, const char sourceLimit,

	1617 int32_t *offsets,

	1618 UBool flush,

	1619 UErrorCode *err) {

	1620 UConverterToUnicodeArgs args;

	1621 const char *s;

	1622 UChar *t;

	1623

	1624 /* check parameters */

	1625 if(err==NULL \|\| U_FAILURE(*err)) {

	1626 return;

	1627 }

	1628

	1629 if(cnv==NULL \|\| target==NULL \|\| source==NULL) {

	1630 *err=U_ILLEGAL_ARGUMENT_ERROR;

	1631 return;

	1632 }

	1633

	1634 s=*source;

	1635 t=*target;

	1636

	1637 if ((const void )U_MAX_PTR(targetLimit) == (const void )targetLimit) {

	1638 /*

	1639 Prevent code from going into an infinite loop in case we do hit this

	1640 limit. The limit pointer is expected to be on a UChar * boundary.

	1641 This also prevents the next argument check from failing.

	1642 */

	1643 targetLimit = (const UChar )(((const char )targetLimit) - 1);

	1644 }

	1645

	1646 /*

	1647 * All these conditions should never happen.

	1648 *

	1649 * 1) Make sure that the limits are >= to the address source or target

	1650 *

	1651 * 2) Make sure that the buffer sizes do not exceed the number range for

	1652 * int32_t because some functions use the size (in units or bytes)

	1653 * rather than comparing pointers, and because offsets are int32_t values.

	1654 *

	1655 * size_t is guaranteed to be unsigned and large enough for the job.

	1656 *

	1657 * Return with an error instead of adjusting the limits because we would

	1658 * not be able to maintain the semantics that either the source must be

	1659 * consumed or the target filled (unless an error occurs).

	1660 * An adjustment would be sourceLimit=t+0x7fffffff; for example.

	1661 *

	1662 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer

	1663 * to a char * pointer and provide an incomplete UChar code unit.

	1664 */

	1665 if (sourceLimit<s \|\| targetLimit<t \|\|

	1666 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) \|\|

	1667 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) \|\|

	1668 (((const char )targetLimit-(const char )t) & 1) != 0

	1669 ) {

	1670 *err=U_ILLEGAL_ARGUMENT_ERROR;

	1671 return;

	1672 }

	1673

	1674 /* output the target overflow buffer */

	1675 if( cnv->UCharErrorBufferLength>0 &&

	1676 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)

	1677 ) {

	1678 /* U_BUFFER_OVERFLOW_ERROR */

	1679 return;

	1680 }

	1681 /* target may have moved, therefore stop using t /

	1682

	1683 if(!flush && s==sourceLimit && cnv->preToULength>=0) {

	1684 /* the overflow buffer is emptied and there is no new input: we are done */

	1685 return;

	1686 }

	1687

	1688 /*

	1689 * Do not simply return with a buffer overflow error if

	1690 * !flush && t==targetLimit

	1691 * because it is possible that the source will not generate any output.

	1692 * For example, the skip callback may be called;

	1693 * it does not output anything.

	1694 */

	1695

	1696 /* prepare the converter arguments */

	1697 args.converter=cnv;

	1698 args.flush=flush;

	1699 args.offsets=offsets;

	1700 args.source=s;

	1701 args.sourceLimit=sourceLimit;

	1702 args.target=*target;

	1703 args.targetLimit=targetLimit;

	1704 args.size=sizeof(args);

	1705

	1706 _toUnicodeWithCallback(&args, err);

	1707

	1708 *source=args.source;

	1709 *target=args.target;

	1710 }

	1711

	1712 /* ucnv_to/fromUChars() ----------------------------------------------------- */

	1713

	1714 U_CAPI int32_t U_EXPORT2

	1715 ucnv_fromUChars(UConverter *cnv,

	1716 char *dest, int32_t destCapacity,

	1717 const UChar *src, int32_t srcLength,

	1718 UErrorCode *pErrorCode) {

	1719 const UChar *srcLimit;

	1720 char originalDest, destLimit;

	1721 int32_t destLength;

	1722

	1723 /* check arguments */

	1724 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	1725 return 0;

	1726 }

	1727

	1728 if( cnv==NULL \|\|

	1729 destCapacity<0 \|\| (destCapacity>0 && dest==NULL) \|\|

	1730 srcLength<-1 \|\| (srcLength!=0 && src==NULL)

	1731 ) {

	1732 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	1733 return 0;

	1734 }

	1735

	1736 /* initialize */

	1737 ucnv_resetFromUnicode(cnv);

	1738 originalDest=dest;

	1739 if(srcLength==-1) {

	1740 srcLength=u_strlen(src);

	1741 }

	1742 if(srcLength>0) {

	1743 srcLimit=src+srcLength;

	1744 destLimit=dest+destCapacity;

	1745

	1746 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */

	1747 if(destLimit<dest \|\| (destLimit==NULL && dest!=NULL)) {

	1748 destLimit=(char *)U_MAX_PTR(dest);

	1749 }

	1750

	1751 /* perform the conversion */

	1752 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorC ode);

	1753 destLength=(int32_t)(dest-originalDest);

	1754

	1755 /* if an overflow occurs, then get the preflighting length */

	1756 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

	1757 char buffer[1024];

	1758

	1759 destLimit=buffer+sizeof(buffer);

	1760 do {

	1761 dest=buffer;

	1762 *pErrorCode=U_ZERO_ERROR;

	1763 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);

	1764 destLength+=(int32_t)(dest-buffer);

	1765 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

	1766 }

	1767 } else {

	1768 destLength=0;

	1769 }

	1770

	1771 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);

	1772 }

	1773

	1774 U_CAPI int32_t U_EXPORT2

	1775 ucnv_toUChars(UConverter *cnv,

	1776 UChar *dest, int32_t destCapacity,

	1777 const char *src, int32_t srcLength,

	1778 UErrorCode *pErrorCode) {

	1779 const char *srcLimit;

	1780 UChar originalDest, destLimit;

	1781 int32_t destLength;

	1782

	1783 /* check arguments */

	1784 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	1785 return 0;

	1786 }

	1787

	1788 if( cnv==NULL \|\|

	1789 destCapacity<0 \|\| (destCapacity>0 && dest==NULL) \|\|

	1790 srcLength<-1 \|\| (srcLength!=0 && src==NULL))

	1791 {

	1792 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	1793 return 0;

	1794 }

	1795

	1796 /* initialize */

	1797 ucnv_resetToUnicode(cnv);

	1798 originalDest=dest;

	1799 if(srcLength==-1) {

	1800 srcLength=(int32_t)uprv_strlen(src);

	1801 }

	1802 if(srcLength>0) {

	1803 srcLimit=src+srcLength;

	1804 destLimit=dest+destCapacity;

	1805

	1806 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */

	1807 if(destLimit<dest \|\| (destLimit==NULL && dest!=NULL)) {

	1808 destLimit=(UChar *)U_MAX_PTR(dest);

	1809 }

	1810

	1811 /* perform the conversion */

	1812 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCod e);

	1813 destLength=(int32_t)(dest-originalDest);

	1814

	1815 /* if an overflow occurs, then get the preflighting length */

	1816 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)

	1817 {

	1818 UChar buffer[1024];

	1819

	1820 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;

	1821 do {

	1822 dest=buffer;

	1823 *pErrorCode=U_ZERO_ERROR;

	1824 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, p ErrorCode);

	1825 destLength+=(int32_t)(dest-buffer);

	1826 }

	1827 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

	1828 }

	1829 } else {

	1830 destLength=0;

	1831 }

	1832

	1833 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode) ;

	1834 }

	1835

	1836 /* ucnv_getNextUChar() ------------------------------------------------------ */

	1837

	1838 U_CAPI UChar32 U_EXPORT2

	1839 ucnv_getNextUChar(UConverter *cnv,

	1840 const char *source, const char sourceLimit,

	1841 UErrorCode *err) {

	1842 UConverterToUnicodeArgs args;

	1843 UChar buffer[U16_MAX_LENGTH];

	1844 const char *s;

	1845 UChar32 c;

	1846 int32_t i, length;

	1847

	1848 /* check parameters */

	1849 if(err==NULL \|\| U_FAILURE(*err)) {

	1850 return 0xffff;

	1851 }

	1852

	1853 if(cnv==NULL \|\| source==NULL) {

	1854 *err=U_ILLEGAL_ARGUMENT_ERROR;

	1855 return 0xffff;

	1856 }

	1857

	1858 s=*source;

	1859 if(sourceLimit<s) {

	1860 *err=U_ILLEGAL_ARGUMENT_ERROR;

	1861 return 0xffff;

	1862 }

	1863

	1864 /*

	1865 * Make sure that the buffer sizes do not exceed the number range for

	1866 * int32_t because some functions use the size (in units or bytes)

	1867 * rather than comparing pointers, and because offsets are int32_t values.

	1868 *

	1869 * size_t is guaranteed to be unsigned and large enough for the job.

	1870 *

	1871 * Return with an error instead of adjusting the limits because we would

	1872 * not be able to maintain the semantics that either the source must be

	1873 * consumed or the target filled (unless an error occurs).

	1874 * An adjustment would be sourceLimit=t+0x7fffffff; for example.

	1875 */

	1876 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {

	1877 *err=U_ILLEGAL_ARGUMENT_ERROR;

	1878 return 0xffff;

	1879 }

	1880

	1881 c=U_SENTINEL;

	1882

	1883 /* flush the target overflow buffer */

	1884 if(cnv->UCharErrorBufferLength>0) {

	1885 UChar *overflow;

	1886

	1887 overflow=cnv->UCharErrorBuffer;

	1888 i=0;

	1889 length=cnv->UCharErrorBufferLength;

	1890 U16_NEXT(overflow, i, length, c);

	1891

	1892 /* move the remaining overflow contents up to the beginning */

	1893 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {

	1894 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,

	1895 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);

	1896 }

	1897

	1898 if(!U16_IS_LEAD(c) \|\| i<length) {

	1899 return c;

	1900 }

	1901 /*

	1902 * Continue if the overflow buffer contained only a lead surrogate,

	1903 * in case the converter outputs single surrogates from complete

	1904 * input sequences.

	1905 */

	1906 }

	1907

	1908 /*

	1909 * flush==TRUE is implied for ucnv_getNextUChar()

	1910 *

	1911 * do not simply return even if s==sourceLimit because the converter may

	1912 * not have seen flush==TRUE before

	1913 */

	1914

	1915 /* prepare the converter arguments */

	1916 args.converter=cnv;

	1917 args.flush=TRUE;

	1918 args.offsets=NULL;

	1919 args.source=s;

	1920 args.sourceLimit=sourceLimit;

	1921 args.target=buffer;

	1922 args.targetLimit=buffer+1;

	1923 args.size=sizeof(args);

	1924

	1925 if(c<0) {

	1926 /*

	1927 * call the native getNextUChar() implementation if we are

	1928 * at a character boundary (toULength==0)

	1929 *

	1930 * unlike with _toUnicode(), getNextUChar() implementations must set

	1931 * U_TRUNCATED_CHAR_FOUND for truncated input,

	1932 * in addition to setting toULength/toUBytes[]

	1933 */

	1934 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {

	1935 c=cnv->sharedData->impl->getNextUChar(&args, err);

	1936 *source=s=args.source;

	1937 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {

	1938 /* reset the converter without calling the callback function */

	1939 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);

	1940 return 0xffff; /* no output */

	1941 } else if(U_SUCCESS(*err) && c>=0) {

	1942 return c;

	1943 /*

	1944 * else fall through to use _toUnicode() because

	1945 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all

	1946 * U_FAILURE: call _toUnicode() for callback handling (do not outp ut c)

	1947 */

	1948 }

	1949 }

	1950

	1951 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */

	1952 _toUnicodeWithCallback(&args, err);

	1953

	1954 if(*err==U_BUFFER_OVERFLOW_ERROR) {

	1955 *err=U_ZERO_ERROR;

	1956 }

	1957

	1958 i=0;

	1959 length=(int32_t)(args.target-buffer);

	1960 } else {

	1961 /* write the lead surrogate from the overflow buffer */

	1962 buffer[0]=(UChar)c;

	1963 args.target=buffer+1;

	1964 i=0;

	1965 length=1;

	1966 }

	1967

	1968 /* buffer contents starts at i and ends before length */

	1969

	1970 if(U_FAILURE(*err)) {

	1971 c=0xffff; /* no output */

	1972 } else if(length==0) {

	1973 /* no input or only state changes */

	1974 *err=U_INDEX_OUTOFBOUNDS_ERROR;

	1975 /* no need to reset explicitly because _toUnicodeWithCallback() did it * /

	1976 c=0xffff; /* no output */

	1977 } else {

	1978 c=buffer[0];

	1979 i=1;

	1980 if(!U16_IS_LEAD(c)) {

	1981 /* consume c=buffer[0], done */

	1982 } else {

	1983 /* got a lead surrogate, see if a trail surrogate follows */

	1984 UChar c2;

	1985

	1986 if(cnv->UCharErrorBufferLength>0) {

	1987 /* got overflow output from the conversion */

	1988 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {

	1989 /* got a trail surrogate, too */

	1990 c=U16_GET_SUPPLEMENTARY(c, c2);

	1991

	1992 /* move the remaining overflow contents up to the beginning */

	1993 if((--cnv->UCharErrorBufferLength)>0) {

	1994 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffe r+1,

	1995 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR) ;

	1996 }

	1997 } else {

	1998 /* c is an unpaired lead surrogate, just return it */

	1999 }

	2000 } else if(args.source<sourceLimit) {

	2001 /* convert once more, to buffer[1] */

	2002 args.targetLimit=buffer+2;

	2003 _toUnicodeWithCallback(&args, err);

	2004 if(*err==U_BUFFER_OVERFLOW_ERROR) {

	2005 *err=U_ZERO_ERROR;

	2006 }

	2007

	2008 length=(int32_t)(args.target-buffer);

	2009 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {

	2010 /* got a trail surrogate, too */

	2011 c=U16_GET_SUPPLEMENTARY(c, c2);

	2012 i=2;

	2013 }

	2014 }

	2015 }

	2016 }

	2017

	2018 /*

	2019 * move leftover output from buffer[i..length[

	2020 * into the beginning of the overflow buffer

	2021 */

	2022 if(i<length) {

	2023 /* move further overflow back */

	2024 int32_t delta=length-i;

	2025 if((length=cnv->UCharErrorBufferLength)>0) {

	2026 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,

	2027 length*U_SIZEOF_UCHAR);

	2028 }

	2029 cnv->UCharErrorBufferLength=(int8_t)(length+delta);

	2030

	2031 cnv->UCharErrorBuffer[0]=buffer[i++];

	2032 if(delta>1) {

	2033 cnv->UCharErrorBuffer[1]=buffer[i];

	2034 }

	2035 }

	2036

	2037 *source=args.source;

	2038 return c;

	2039 }

	2040

	2041 /* ucnv_convert() and siblings ---------------------------------------------- */

	2042

	2043 U_CAPI void U_EXPORT2

	2044 ucnv_convertEx(UConverter targetCnv, UConverter sourceCnv,

	2045 char *target, const char targetLimit,

	2046 const char *source, const char sourceLimit,

	2047 UChar pivotStart, UChar *pivotSource,

	2048 UChar *pivotTarget, const UChar pivotLimit,

	2049 UBool reset, UBool flush,

	2050 UErrorCode *pErrorCode) {

	2051 UChar pivotBuffer[CHUNK_SIZE];

	2052 const UChar *myPivotSource;

	2053 UChar *myPivotTarget;

	2054 const char *s;

	2055 char *t;

	2056

	2057 UConverterToUnicodeArgs toUArgs;

	2058 UConverterFromUnicodeArgs fromUArgs;

	2059 UConverterConvert convert;

	2060

	2061 /* error checking */

	2062 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	2063 return;

	2064 }

	2065

	2066 if( targetCnv==NULL \|\| sourceCnv==NULL \|\|

	2067 source==NULL \|\| *source==NULL \|\|

	2068 target==NULL \|\| *target==NULL \|\| targetLimit==NULL

	2069 ) {

	2070 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2071 return;

	2072 }

	2073

	2074 s=*source;

	2075 t=*target;

	2076 if((sourceLimit!=NULL && sourceLimit<s) \|\| targetLimit<t) {

	2077 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2078 return;

	2079 }

	2080

	2081 /*

	2082 * Make sure that the buffer sizes do not exceed the number range for

	2083 * int32_t. See ucnv_toUnicode() for a more detailed comment.

	2084 */

	2085 if(

	2086 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sou rceLimit>s)) \|\|

	2087 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)

	2088 ) {

	2089 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2090 return;

	2091 }

	2092

	2093 if(pivotStart==NULL) {

	2094 if(!flush) {

	2095 /* streaming conversion requires an explicit pivot buffer */

	2096 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2097 return;

	2098 }

	2099

	2100 /* use the stack pivot buffer */

	2101 myPivotSource=myPivotTarget=pivotStart=pivotBuffer;

	2102 pivotSource=(UChar **)&myPivotSource;

	2103 pivotTarget=&myPivotTarget;

	2104 pivotLimit=pivotBuffer+CHUNK_SIZE;

	2105 } else if( pivotStart>=pivotLimit \|\|

	2106 pivotSource==NULL \|\| *pivotSource==NULL \|\|

	2107 pivotTarget==NULL \|\| *pivotTarget==NULL \|\|

	2108 pivotLimit==NULL

	2109 ) {

	2110 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2111 return;

	2112 }

	2113

	2114 if(sourceLimit==NULL) {

	2115 /* get limit of single-byte-NUL-terminated source string */

	2116 sourceLimit=uprv_strchr(*source, 0);

	2117 }

	2118

	2119 if(reset) {

	2120 ucnv_resetToUnicode(sourceCnv);

	2121 ucnv_resetFromUnicode(targetCnv);

	2122 pivotSource=pivotTarget=pivotStart;

	2123 } else if(targetCnv->charErrorBufferLength>0) {

	2124 /* output the targetCnv overflow buffer */

	2125 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {

	2126 /* U_BUFFER_OVERFLOW_ERROR */

	2127 return;

	2128 }

	2129 /* target has moved, therefore stop using t /

	2130

	2131 if( !flush &&

	2132 targetCnv->preFromULength>=0 && pivotSource==pivotTarget &&

	2133 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 & & s==sourceLimit

	2134 ) {

	2135 /* the fromUnicode overflow buffer is emptied and there is no new in put: we are done */

	2136 return;

	2137 }

	2138 }

	2139

	2140 /* Is direct-UTF-8 conversion available? */

	2141 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&

	2142 targetCnv->sharedData->impl->fromUTF8!=NULL

	2143 ) {

	2144 convert=targetCnv->sharedData->impl->fromUTF8;

	2145 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&

	2146 sourceCnv->sharedData->impl->toUTF8!=NULL

	2147 ) {

	2148 convert=sourceCnv->sharedData->impl->toUTF8;

	2149 } else {

	2150 convert=NULL;

	2151 }

	2152

	2153 /*

	2154 * If direct-UTF-8 conversion is available, then we use a smaller

	2155 * pivot buffer for error handling and partial matches

	2156 * so that we quickly return to direct conversion.

	2157 *

	2158 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.

	2159 *

	2160 * We could reduce the pivot buffer size further, at the cost of

	2161 * buffer overflows from callbacks.

	2162 * The pivot buffer should not be smaller than the maximum number of

	2163 * fromUnicode extension table input UChars

	2164 * (for m:n conversion, see

	2165 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])

	2166 * or 2 for surrogate pairs.

	2167 *

	2168 * Too small a buffer can cause thrashing between pivoting and direct

	2169 * conversion, with function call overhead outweighing the benefits

	2170 * of direct conversion.

	2171 */

	2172 if(convert!=NULL && (pivotLimit-pivotStart)>32) {

	2173 pivotLimit=pivotStart+32;

	2174 }

	2175

	2176 /* prepare the converter arguments */

	2177 fromUArgs.converter=targetCnv;

	2178 fromUArgs.flush=FALSE;

	2179 fromUArgs.offsets=NULL;

	2180 fromUArgs.target=*target;

	2181 fromUArgs.targetLimit=targetLimit;

	2182 fromUArgs.size=sizeof(fromUArgs);

	2183

	2184 toUArgs.converter=sourceCnv;

	2185 toUArgs.flush=flush;

	2186 toUArgs.offsets=NULL;

	2187 toUArgs.source=s;

	2188 toUArgs.sourceLimit=sourceLimit;

	2189 toUArgs.targetLimit=pivotLimit;

	2190 toUArgs.size=sizeof(toUArgs);

	2191

	2192 /*

	2193 * TODO: Consider separating this function into two functions,

	2194 * extracting exactly the conversion loop,

	2195 * for readability and to reduce the set of visible variables.

	2196 *

	2197 * Otherwise stop using s and t from here on.

	2198 */

	2199 s=t=NULL;

	2200

	2201 /*

	2202 * conversion loop

	2203 *

	2204 * The sequence of steps in the loop may appear backward,

	2205 * but the principle is simple:

	2206 * In the chain of

	2207 * source - sourceCnv overflow - pivot - targetCnv overflow - target

	2208 * empty out later buffers before refilling them from earlier ones.

	2209 *

	2210 * The targetCnv overflow buffer is flushed out only once before the loop.

	2211 */

	2212 for(;;) {

	2213 /*

	2214 * if(pivot not empty or error or replay or flush fromUnicode) {

	2215 * fromUnicode(pivot -> target);

	2216 * }

	2217 *

	2218 * For pivoting conversion; and for direct conversion for

	2219 * error callback handling and flushing the replay buffer.

	2220 */

	2221 if( pivotSource<pivotTarget \|\|

	2222 U_FAILURE(*pErrorCode) \|\|

	2223 targetCnv->preFromULength<0 \|\|

	2224 fromUArgs.flush

	2225 ) {

	2226 fromUArgs.source=*pivotSource;

	2227 fromUArgs.sourceLimit=*pivotTarget;

	2228 _fromUnicodeWithCallback(&fromUArgs, pErrorCode);

	2229 if(U_FAILURE(*pErrorCode)) {

	2230 /* target overflow, or conversion error */

	2231 pivotSource=(UChar )fromUArgs.source;

	2232 break;

	2233 }

	2234

	2235 /*

	2236 * _fromUnicodeWithCallback() must have consumed the pivot contents

	2237 * (pivotSource==pivotTarget) since it returned with U_SUCCESS()

	2238 */

	2239 }

	2240

	2241 /* The pivot buffer is empty; reset it so we start at pivotStart. */

	2242 pivotSource=pivotTarget=pivotStart;

	2243

	2244 /*

	2245 * if(sourceCnv overflow buffer not empty) {

	2246 * move(sourceCnv overflow buffer -> pivot);

	2247 * continue;

	2248 * }

	2249 */

	2250 /* output the sourceCnv overflow buffer */

	2251 if(sourceCnv->UCharErrorBufferLength>0) {

	2252 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {

	2253 /* U_BUFFER_OVERFLOW_ERROR */

	2254 *pErrorCode=U_ZERO_ERROR;

	2255 }

	2256 continue;

	2257 }

	2258

	2259 /*

	2260 * check for end of input and break if done

	2261 *

	2262 * Checking both flush and fromUArgs.flush ensures that the converters

	2263 * have been called with the flush flag set if the ucnv_convertEx()

	2264 * caller set it.

	2265 */

	2266 if( toUArgs.source==sourceLimit &&

	2267 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&

	2268 (!flush \|\| fromUArgs.flush)

	2269 ) {

	2270 /* done successfully */

	2271 break;

	2272 }

	2273

	2274 /*

	2275 * use direct conversion if available

	2276 * but not if continuing a partial match

	2277 * or flushing the toUnicode replay buffer

	2278 */

	2279 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULen gth==0) {

	2280 if(*pErrorCode==U_USING_DEFAULT_WARNING) {

	2281 /* remove a warning that may be set by this function */

	2282 *pErrorCode=U_ZERO_ERROR;

	2283 }

	2284 convert(&fromUArgs, &toUArgs, pErrorCode);

	2285 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

	2286 break;

	2287 } else if(U_FAILURE(*pErrorCode)) {

	2288 if(sourceCnv->toULength>0) {

	2289 /*

	2290 * Fall through to calling _toUnicodeWithCallback()

	2291 * for callback handling.

	2292 *

	2293 * The pivot buffer will be reset with

	2294 * pivotSource=pivotTarget=pivotStart;

	2295 * which indicates a toUnicode error to the caller

	2296 * (*pivotSource==pivotStart shows no pivot UChars consumed) .

	2297 */

	2298 } else {

	2299 /*

	2300 * Indicate a fromUnicode error to the caller

	2301 * (*pivotSource>pivotStart shows some pivot UChars consumed ).

	2302 */

	2303 pivotSource=pivotTarget=pivotStart+1;

	2304 /*

	2305 * Loop around to calling _fromUnicodeWithCallbacks()

	2306 * for callback handling.

	2307 */

	2308 continue;

	2309 }

	2310 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {

	2311 /*

	2312 * No error, but the implementation requested to temporarily

	2313 * fall back to pivoting.

	2314 */

	2315 *pErrorCode=U_ZERO_ERROR;

	2316 /*

	2317 * The following else branches are almost identical to the end-of-in put

	2318 * handling in _toUnicodeWithCallback().

	2319 * Avoid calling it just for the end of input.

	2320 */

	2321 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */

	2322 /*

	2323 * the entire input stream is consumed

	2324 * and there is a partial, truncated input sequence left

	2325 */

	2326

	2327 /* inject an error and continue with callback handling */

	2328 *pErrorCode=U_TRUNCATED_CHAR_FOUND;

	2329 } else {

	2330 /* input consumed */

	2331 if(flush) {

	2332 /* reset the converters without calling the callback functio ns */

	2333 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);

	2334 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);

	2335 }

	2336

	2337 /* done successfully */

	2338 break;

	2339 }

	2340 }

	2341

	2342 /*

	2343 * toUnicode(source -> pivot);

	2344 *

	2345 * For pivoting conversion; and for direct conversion for

	2346 * error callback handling, continuing partial matches

	2347 * and flushing the replay buffer.

	2348 *

	2349 * The pivot buffer is empty and reset.

	2350 */

	2351 toUArgs.target=pivotStart; /* ==pivotTarget /

	2352 /* toUArgs.targetLimit=pivotLimit; already set before the loop */

	2353 _toUnicodeWithCallback(&toUArgs, pErrorCode);

	2354 *pivotTarget=toUArgs.target;

	2355 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {

	2356 /* pivot overflow: continue with the conversion loop */

	2357 *pErrorCode=U_ZERO_ERROR;

	2358 } else if(U_FAILURE(pErrorCode) \|\| (!flush && pivotTarget==pivotStart) ) {

	2359 /* conversion error, or there was nothing left to convert */

	2360 break;

	2361 }

	2362 /*

	2363 * else:

	2364 * _toUnicodeWithCallback() wrote into the pivot buffer,

	2365 * continue with fromUnicode conversion.

	2366 *

	2367 * Set the fromUnicode flush flag if we flush and if toUnicode has

	2368 * processed the end of the input.

	2369 */

	2370 if( flush && toUArgs.source==sourceLimit &&

	2371 sourceCnv->preToULength>=0 &&

	2372 sourceCnv->UCharErrorBufferLength==0

	2373 ) {

	2374 fromUArgs.flush=TRUE;

	2375 }

	2376 }

	2377

	2378 /*

	2379 * The conversion loop is exited when one of the following is true:

	2380 * - the entire source text has been converted successfully to the target bu ffer

	2381 * - a target buffer overflow occurred

	2382 * - a conversion error occurred

	2383 */

	2384

	2385 *source=toUArgs.source;

	2386 *target=fromUArgs.target;

	2387

	2388 /* terminate the target buffer if possible */

	2389 if(flush && U_SUCCESS(*pErrorCode)) {

	2390 if(*target!=targetLimit) {

	2391 **target=0;

	2392 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {

	2393 *pErrorCode=U_ZERO_ERROR;

	2394 }

	2395 } else {

	2396 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;

	2397 }

	2398 }

	2399 }

	2400

	2401 /* internal implementation of ucnv_convert() etc. with preflighting */

	2402 static int32_t

	2403 ucnv_internalConvert(UConverter outConverter, UConverter inConverter,

	2404 char *target, int32_t targetCapacity,

	2405 const char *source, int32_t sourceLength,

	2406 UErrorCode *pErrorCode) {

	2407 UChar pivotBuffer[CHUNK_SIZE];

	2408 UChar pivot, pivot2;

	2409

	2410 char *myTarget;

	2411 const char *sourceLimit;

	2412 const char *targetLimit;

	2413 int32_t targetLength=0;

	2414

	2415 /* set up */

	2416 if(sourceLength<0) {

	2417 sourceLimit=uprv_strchr(source, 0);

	2418 } else {

	2419 sourceLimit=source+sourceLength;

	2420 }

	2421

	2422 /* if there is no input data, we're done */

	2423 if(source==sourceLimit) {

	2424 return u_terminateChars(target, targetCapacity, 0, pErrorCode);

	2425 }

	2426

	2427 pivot=pivot2=pivotBuffer;

	2428 myTarget=target;

	2429 targetLength=0;

	2430

	2431 if(targetCapacity>0) {

	2432 /* perform real conversion */

	2433 targetLimit=target+targetCapacity;

	2434 ucnv_convertEx(outConverter, inConverter,

	2435 &myTarget, targetLimit,

	2436 &source, sourceLimit,

	2437 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,

	2438 FALSE,

	2439 TRUE,

	2440 pErrorCode);

	2441 targetLength=(int32_t)(myTarget-target);

	2442 }

	2443

	2444 /*

	2445 * If the output buffer is exhausted (or we are only "preflighting"), we nee d to stop writing

	2446 * to it but continue the conversion in order to store in targetCapacity

	2447 * the number of bytes that was required.

	2448 */

	2449 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR \|\| targetCapacity==0)

	2450 {

	2451 char targetBuffer[CHUNK_SIZE];

	2452

	2453 targetLimit=targetBuffer+CHUNK_SIZE;

	2454 do {

	2455 *pErrorCode=U_ZERO_ERROR;

	2456 myTarget=targetBuffer;

	2457 ucnv_convertEx(outConverter, inConverter,

	2458 &myTarget, targetLimit,

	2459 &source, sourceLimit,

	2460 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,

	2461 FALSE,

	2462 TRUE,

	2463 pErrorCode);

	2464 targetLength+=(int32_t)(myTarget-targetBuffer);

	2465 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

	2466

	2467 /* done with preflighting, set warnings and errors as appropriate */

	2468 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode );

	2469 }

	2470

	2471 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */

	2472 return targetLength;

	2473 }

	2474

	2475 U_CAPI int32_t U_EXPORT2

	2476 ucnv_convert(const char toConverterName, const char fromConverterName,

	2477 char *target, int32_t targetCapacity,

	2478 const char *source, int32_t sourceLength,

	2479 UErrorCode *pErrorCode) {

	2480 UConverter in, out; /* stack-allocated */

	2481 UConverter inConverter, outConverter;

	2482 int32_t targetLength;

	2483

	2484 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	2485 return 0;

	2486 }

	2487

	2488 if( source==NULL \|\| sourceLength<-1 \|\|

	2489 targetCapacity<0 \|\| (targetCapacity>0 && target==NULL)

	2490 ) {

	2491 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2492 return 0;

	2493 }

	2494

	2495 /* if there is no input data, we're done */

	2496 if(sourceLength==0 \|\| (sourceLength<0 && *source==0)) {

	2497 return u_terminateChars(target, targetCapacity, 0, pErrorCode);

	2498 }

	2499

	2500 /* create the converters */

	2501 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);

	2502 if(U_FAILURE(*pErrorCode)) {

	2503 return 0;

	2504 }

	2505

	2506 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);

	2507 if(U_FAILURE(*pErrorCode)) {

	2508 ucnv_close(inConverter);

	2509 return 0;

	2510 }

	2511

	2512 targetLength=ucnv_internalConvert(outConverter, inConverter,

	2513 target, targetCapacity,

	2514 source, sourceLength,

	2515 pErrorCode);

	2516

	2517 ucnv_close(inConverter);

	2518 ucnv_close(outConverter);

	2519

	2520 return targetLength;

	2521 }

	2522

	2523 /* @internal */

	2524 static int32_t

	2525 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,

	2526 UConverterType algorithmicType,

	2527 UConverter *cnv,

	2528 char *target, int32_t targetCapacity,

	2529 const char *source, int32_t sourceLength,

	2530 UErrorCode *pErrorCode) {

	2531 UConverter algoConverterStatic; /* stack-allocated */

	2532 UConverter algoConverter, to, *from;

	2533 int32_t targetLength;

	2534

	2535 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	2536 return 0;

	2537 }

	2538

	2539 if( cnv==NULL \|\| source==NULL \|\| sourceLength<-1 \|\|

	2540 targetCapacity<0 \|\| (targetCapacity>0 && target==NULL)

	2541 ) {

	2542 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	2543 return 0;

	2544 }

	2545

	2546 /* if there is no input data, we're done */

	2547 if(sourceLength==0 \|\| (sourceLength<0 && *source==0)) {

	2548 return u_terminateChars(target, targetCapacity, 0, pErrorCode);

	2549 }

	2550

	2551 /* create the algorithmic converter */

	2552 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorith micType,

	2553 "", 0, pErrorCode);

	2554 if(U_FAILURE(*pErrorCode)) {

	2555 return 0;

	2556 }

	2557

	2558 /* reset the other converter */

	2559 if(convertToAlgorithmic) {

	2560 /* cnv->Unicode->algo */

	2561 ucnv_resetToUnicode(cnv);

	2562 to=algoConverter;

	2563 from=cnv;

	2564 } else {

	2565 /* algo->Unicode->cnv */

	2566 ucnv_resetFromUnicode(cnv);

	2567 from=algoConverter;

	2568 to=cnv;

	2569 }

	2570

	2571 targetLength=ucnv_internalConvert(to, from,

	2572 target, targetCapacity,

	2573 source, sourceLength,

	2574 pErrorCode);

	2575

	2576 ucnv_close(algoConverter);

	2577

	2578 return targetLength;

	2579 }

	2580

	2581 U_CAPI int32_t U_EXPORT2

	2582 ucnv_toAlgorithmic(UConverterType algorithmicType,

	2583 UConverter *cnv,

	2584 char *target, int32_t targetCapacity,

	2585 const char *source, int32_t sourceLength,

	2586 UErrorCode *pErrorCode) {

	2587 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,

	2588 target, targetCapacity,

	2589 source, sourceLength,

	2590 pErrorCode);

	2591 }

	2592

	2593 U_CAPI int32_t U_EXPORT2

	2594 ucnv_fromAlgorithmic(UConverter *cnv,

	2595 UConverterType algorithmicType,

	2596 char *target, int32_t targetCapacity,

	2597 const char *source, int32_t sourceLength,

	2598 UErrorCode *pErrorCode) {

	2599 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,

	2600 target, targetCapacity,

	2601 source, sourceLength,

	2602 pErrorCode);

	2603 }

	2604

	2605 U_CAPI UConverterType U_EXPORT2

	2606 ucnv_getType(const UConverter* converter)

	2607 {

	2608 int8_t type = converter->sharedData->staticData->conversionType;

	2609 #if !UCONFIG_NO_LEGACY_CONVERSION

	2610 if(type == UCNV_MBCS) {

	2611 return ucnv_MBCSGetType(converter);

	2612 }

	2613 #endif

	2614 return (UConverterType)type;

	2615 }

	2616

	2617 U_CAPI void U_EXPORT2

	2618 ucnv_getStarters(const UConverter* converter,

	2619 UBool starters[256],

	2620 UErrorCode* err)

	2621 {

	2622 if (err == NULL \|\| U_FAILURE(*err)) {

	2623 return;

	2624 }

	2625

	2626 if(converter->sharedData->impl->getStarters != NULL) {

	2627 converter->sharedData->impl->getStarters(converter, starters, err);

	2628 } else {

	2629 *err = U_ILLEGAL_ARGUMENT_ERROR;

	2630 }

	2631 }

	2632

	2633 static const UAmbiguousConverter ucnv_getAmbiguous(const UConverter cnv)

	2634 {

	2635 UErrorCode errorCode;

	2636 const char *name;

	2637 int32_t i;

	2638

	2639 if(cnv==NULL) {

	2640 return NULL;

	2641 }

	2642

	2643 errorCode=U_ZERO_ERROR;

	2644 name=ucnv_getName(cnv, &errorCode);

	2645 if(U_FAILURE(errorCode)) {

	2646 return NULL;

	2647 }

	2648

	2649 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter) ); ++i)

	2650 {

	2651 if(0==uprv_strcmp(name, ambiguousConverters[i].name))

	2652 {

	2653 return ambiguousConverters+i;

	2654 }

	2655 }

	2656

	2657 return NULL;

	2658 }

	2659

	2660 U_CAPI void U_EXPORT2

	2661 ucnv_fixFileSeparator(const UConverter *cnv,

	2662 UChar* source,

	2663 int32_t sourceLength) {

	2664 const UAmbiguousConverter *a;

	2665 int32_t i;

	2666 UChar variant5c;

	2667

	2668 if(cnv==NULL \|\| source==NULL \|\| sourceLength<=0 \|\| (a=ucnv_getAmbiguous(cnv) )==NULL)

	2669 {

	2670 return;

	2671 }

	2672

	2673 variant5c=a->variant5c;

	2674 for(i=0; i<sourceLength; ++i) {

	2675 if(source[i]==variant5c) {

	2676 source[i]=0x5c;

	2677 }

	2678 }

	2679 }

	2680

	2681 U_CAPI UBool U_EXPORT2

	2682 ucnv_isAmbiguous(const UConverter *cnv) {

	2683 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);

	2684 }

	2685

	2686 U_CAPI void U_EXPORT2

	2687 ucnv_setFallback(UConverter *cnv, UBool usesFallback)

	2688 {

	2689 cnv->useFallback = usesFallback;

	2690 }

	2691

	2692 U_CAPI UBool U_EXPORT2

	2693 ucnv_usesFallback(const UConverter *cnv)

	2694 {

	2695 return cnv->useFallback;

	2696 }

	2697

	2698 U_CAPI void U_EXPORT2

	2699 ucnv_getInvalidChars (const UConverter * converter,

	2700 char *errBytes,

	2701 int8_t * len,

	2702 UErrorCode * err)

	2703 {

	2704 if (err == NULL \|\| U_FAILURE(*err))

	2705 {

	2706 return;

	2707 }

	2708 if (len == NULL \|\| errBytes == NULL \|\| converter == NULL)

	2709 {

	2710 *err = U_ILLEGAL_ARGUMENT_ERROR;

	2711 return;

	2712 }

	2713 if (*len < converter->invalidCharLength)

	2714 {

	2715 *err = U_INDEX_OUTOFBOUNDS_ERROR;

	2716 return;

	2717 }

	2718 if ((*len = converter->invalidCharLength) > 0)

	2719 {

	2720 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);

	2721 }

	2722 }

	2723

	2724 U_CAPI void U_EXPORT2

	2725 ucnv_getInvalidUChars (const UConverter * converter,

	2726 UChar *errChars,

	2727 int8_t * len,

	2728 UErrorCode * err)

	2729 {

	2730 if (err == NULL \|\| U_FAILURE(*err))

	2731 {

	2732 return;

	2733 }

	2734 if (len == NULL \|\| errChars == NULL \|\| converter == NULL)

	2735 {

	2736 *err = U_ILLEGAL_ARGUMENT_ERROR;

	2737 return;

	2738 }

	2739 if (*len < converter->invalidUCharLength)

	2740 {

	2741 *err = U_INDEX_OUTOFBOUNDS_ERROR;

	2742 return;

	2743 }

	2744 if ((*len = converter->invalidUCharLength) > 0)

	2745 {

	2746 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (* len));

	2747 }

	2748 }

	2749

	2750 #define SIG_MAX_LEN 5

	2751

	2752 U_CAPI const char* U_EXPORT2

	2753 ucnv_detectUnicodeSignature( const char* source,

	2754 int32_t sourceLength,

	2755 int32_t* signatureLength,

	2756 UErrorCode* pErrorCode) {

	2757 int32_t dummy;

	2758

	2759 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN

	2760 * bytes we don't misdetect something

	2761 */

	2762 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };

	2763 int i = 0;

	2764

	2765 if((pErrorCode==NULL) \|\| U_FAILURE(*pErrorCode)){

	2766 return NULL;

	2767 }

	2768

	2769 if(source == NULL \|\| sourceLength < -1){

	2770 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;

	2771 return NULL;

	2772 }

	2773

	2774 if(signatureLength == NULL) {

	2775 signatureLength = &dummy;

	2776 }

	2777

	2778 if(sourceLength==-1){

	2779 sourceLength=(int32_t)uprv_strlen(source);

	2780 }

	2781

	2782

	2783 while(i<sourceLength&& i<SIG_MAX_LEN){

	2784 start[i]=source[i];

	2785 i++;

	2786 }

	2787

	2788 if(start[0] == '\xFE' && start[1] == '\xFF') {

	2789 *signatureLength=2;

	2790 return "UTF-16BE";

	2791 } else if(start[0] == '\xFF' && start[1] == '\xFE') {

	2792 if(start[2] == '\x00' && start[3] =='\x00') {

	2793 *signatureLength=4;

	2794 return "UTF-32LE";

	2795 } else {

	2796 *signatureLength=2;

	2797 return "UTF-16LE";

	2798 }

	2799 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {

	2800 *signatureLength=3;

	2801 return "UTF-8";

	2802 } else if(start[0] == '\x00' && start[1] == '\x00' &&

	2803 start[2] == '\xFE' && start[3]=='\xFF') {

	2804 *signatureLength=4;

	2805 return "UTF-32BE";

	2806 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {

	2807 *signatureLength=3;

	2808 return "SCSU";

	2809 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {

	2810 *signatureLength=3;

	2811 return "BOCU-1";

	2812 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {

	2813 /*

	2814 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v /

	2815 * depending on the second UTF-16 code unit.

	2816 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF

	2817 * if it occurs.

	2818 *

	2819 * So far we have +/v

	2820 */

	2821 if(start[3] == '\x38' && start[4] == '\x2D') {

	2822 /* 5 bytes +/v8- */

	2823 *signatureLength=5;

	2824 return "UTF-7";

	2825 } else if(start[3] == '\x38' \|\| start[3] == '\x39' \|\| start[3] == '\x2B' \|\| start[3] == '\x2F') {

	2826 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */

	2827 *signatureLength=4;

	2828 return "UTF-7";

	2829 }

	2830 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3 ]=='\x73'){

	2831 *signatureLength=4;

	2832 return "UTF-EBCDIC";

	2833 }

	2834

	2835

	2836 /* no known Unicode signature byte sequence recognized */

	2837 *signatureLength=0;

	2838 return NULL;

	2839 }

	2840

	2841 U_CAPI int32_t U_EXPORT2

	2842 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)

	2843 {

	2844 if(status == NULL \|\| U_FAILURE(*status)){

	2845 return -1;

	2846 }

	2847 if(cnv == NULL){

	2848 *status = U_ILLEGAL_ARGUMENT_ERROR;

	2849 return -1;

	2850 }

	2851

	2852 if(cnv->preFromULength > 0){

	2853 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;

	2854 }else if(cnv->preFromULength < 0){

	2855 return -cnv->preFromULength ;

	2856 }else if(cnv->fromUChar32 > 0){

	2857 return 1;

	2858 }else if(cnv->preFromUFirstCP >0){

	2859 return U16_LENGTH(cnv->preFromUFirstCP);

	2860 }

	2861 return 0;

	2862

	2863 }

	2864

	2865 U_CAPI int32_t U_EXPORT2

	2866 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){

	2867

	2868 if(status == NULL \|\| U_FAILURE(*status)){

	2869 return -1;

	2870 }

	2871 if(cnv == NULL){

	2872 *status = U_ILLEGAL_ARGUMENT_ERROR;

	2873 return -1;

	2874 }

	2875

	2876 if(cnv->preToULength > 0){

	2877 return cnv->preToULength ;

	2878 }else if(cnv->preToULength < 0){

	2879 return -cnv->preToULength;

	2880 }else if(cnv->toULength > 0){

	2881 return cnv->toULength;

	2882 }

	2883 return 0;

	2884 }

	2885 #endif

	2886

	2887 /*

	2888 * Hey, Emacs, please set the following:

	2889 *

	2890 * Local Variables:

	2891 * indent-tabs-mode: nil

	2892 * End:

	2893 *

	2894 */

OLD	NEW

« no previous file with comments | « icu46/source/common/ucmndata.c ('k') | icu46/source/common/ucnv2022.c » ('j') | no next file with comments »