icu46/source/common/unistr_cnv.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/common/unistr_cnv.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/common/unistr_cnv.cpp

===================================================================

--- icu46/source/common/unistr_cnv.cpp (revision 0)

+++ icu46/source/common/unistr_cnv.cpp (revision 0)

@@ -0,0 +1,425 @@

+/*

+*******************************************************************************

+* file name: unistr_cnv.cpp

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:2

+* created on: 2004aug19

+* created by: Markus W. Scherer

+* Character conversion functions moved here from unistr.cpp

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_CONVERSION

+#include "unicode/putil.h"

+#include "cstring.h"

+#include "cmemory.h"

+#include "unicode/ustring.h"

+#include "unicode/unistr.h"

+#include "unicode/ucnv.h"

+#include "ucnv_imp.h"

+#include "putilimp.h"

+#include "ustr_cnv.h"

+#include "ustr_imp.h"

+U_NAMESPACE_BEGIN

+//========================================

+// Constructors

+//========================================

+#if !U_CHARSET_IS_UTF8

+UnicodeString::UnicodeString(const char *codepageData)

+ : fShortLength(0),

+ fFlags(kShortString)

+ if(codepageData != 0) {

+ doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);

+ }

+UnicodeString::UnicodeString(const char *codepageData,

+ int32_t dataLength)

+ : fShortLength(0),

+ fFlags(kShortString)

+ if(codepageData != 0) {

+ doCodepageCreate(codepageData, dataLength, 0);

+ }

+// else see unistr.cpp

+#endif

+UnicodeString::UnicodeString(const char *codepageData,

+ const char *codepage)

+ : fShortLength(0),

+ fFlags(kShortString)

+ if(codepageData != 0) {

+ doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);

+ }

+UnicodeString::UnicodeString(const char *codepageData,

+ int32_t dataLength,

+ const char *codepage)

+ : fShortLength(0),

+ fFlags(kShortString)

+ if(codepageData != 0) {

+ doCodepageCreate(codepageData, dataLength, codepage);

+ }

+UnicodeString::UnicodeString(const char *src, int32_t srcLength,

+ UConverter *cnv,

+ UErrorCode &errorCode)

+ : fShortLength(0),

+ fFlags(kShortString)

+ if(U_SUCCESS(errorCode)) {

+ // check arguments

+ if(src==NULL) {

+ // treat as an empty string, do nothing more

+ } else if(srcLength<-1) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ } else {

+ // get input length

+ if(srcLength==-1) {

+ srcLength=(int32_t)uprv_strlen(src);

+ }

+ if(srcLength>0) {

+ if(cnv!=0) {

+ // use the provided converter

+ ucnv_resetToUnicode(cnv);

+ doCodepageCreate(src, srcLength, cnv, errorCode);

+ } else {

+ // use the default converter

+ cnv=u_getDefaultConverter(&errorCode);

+ doCodepageCreate(src, srcLength, cnv, errorCode);

+ u_releaseDefaultConverter(cnv);

+ }

+ if(U_FAILURE(errorCode)) {

+ setToBogus();

+ }

+//========================================

+// Codeset conversion

+//========================================

+#if !U_CHARSET_IS_UTF8

+int32_t

+UnicodeString::extract(int32_t start,

+ int32_t length,

+ char *target,

+ uint32_t dstSize) const {

+ return extract(start, length, target, dstSize, 0);

+// else see unistr.cpp

+#endif

+int32_t

+UnicodeString::extract(int32_t start,

+ int32_t length,

+ char *target,

+ uint32_t dstSize,

+ const char *codepage) const

+ // if the arguments are illegal, then do nothing

+ if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {

+ return 0;

+ }

+ // pin the indices to legal values

+ pinIndices(start, length);

+ // We need to cast dstSize to int32_t for all subsequent code.

+ // I don't know why the API was defined with uint32_t but we are stuck with it.

+ // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize

+ // as a limit in some functions, it may wrap around and yield a pointer

+ // that compares less-than target.

+ int32_t capacity;

+ if(dstSize < 0x7fffffff) {

+ // Assume that the capacity is real and a limit pointer won't wrap around.

+ capacity = (int32_t)dstSize;

+ } else {

+ // Pin the capacity so that a limit pointer does not wrap around.

+ char *targetLimit = (char *)U_MAX_PTR(target);

+ // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff

+ // greater than target and does not wrap around the top of the address space.

+ capacity = (int32_t)(targetLimit - target);

+ }

+ // create the converter

+ UConverter *converter;

+ UErrorCode status = U_ZERO_ERROR;

+ // just write the NUL if the string length is 0

+ if(length == 0) {

+ return u_terminateChars(target, capacity, 0, &status);

+ }

+ // if the codepage is the default, use our cache

+ // if it is an empty string, then use the "invariant character" conversion

+ if (codepage == 0) {

+ const char *defaultName = ucnv_getDefaultName();

+ if(UCNV_FAST_IS_UTF8(defaultName)) {

+ return toUTF8(start, length, target, capacity);

+ }

+ converter = u_getDefaultConverter(&status);

+ } else if (*codepage == 0) {

+ // use the "invariant characters" conversion

+ int32_t destLength;

+ if(length <= capacity) {

+ destLength = length;

+ } else {

+ destLength = capacity;

+ }

+ u_UCharsToChars(getArrayStart() + start, target, destLength);

+ return u_terminateChars(target, capacity, length, &status);

+ } else {

+ converter = ucnv_open(codepage, &status);

+ }

+ length = doExtract(start, length, target, capacity, converter, status);

+ // close the converter

+ if (codepage == 0) {

+ u_releaseDefaultConverter(converter);

+ } else {

+ ucnv_close(converter);

+ }

+ return length;

+int32_t

+UnicodeString::extract(char *dest, int32_t destCapacity,

+ UConverter *cnv,

+ UErrorCode &errorCode) const

+ if(U_FAILURE(errorCode)) {

+ return 0;

+ }

+ if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {

+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;

+ return 0;

+ }

+ // nothing to do?

+ if(isEmpty()) {

+ return u_terminateChars(dest, destCapacity, 0, &errorCode);

+ }

+ // get the converter

+ UBool isDefaultConverter;

+ if(cnv==0) {

+ isDefaultConverter=TRUE;

+ cnv=u_getDefaultConverter(&errorCode);

+ if(U_FAILURE(errorCode)) {

+ return 0;

+ }

+ } else {

+ isDefaultConverter=FALSE;

+ ucnv_resetFromUnicode(cnv);

+ }

+ // convert

+ int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);

+ // release the converter

+ if(isDefaultConverter) {

+ u_releaseDefaultConverter(cnv);

+ }

+ return len;

+int32_t

+UnicodeString::doExtract(int32_t start, int32_t length,

+ char *dest, int32_t destCapacity,

+ UConverter *cnv,

+ UErrorCode &errorCode) const

+ if(U_FAILURE(errorCode)) {

+ if(destCapacity!=0) {

+ *dest=0;

+ }

+ return 0;

+ }

+ const UChar *src=getArrayStart()+start, *srcLimit=src+length;

+ char *originalDest=dest;

+ const char *destLimit;

+ if(destCapacity==0) {

+ destLimit=dest=0;

+ } else if(destCapacity==-1) {

+ // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.

+ destLimit=(char*)U_MAX_PTR(dest);

+ // for NUL-termination, translate into highest int32_t

+ destCapacity=0x7fffffff;

+ } else {

+ destLimit=dest+destCapacity;

+ }

+ // perform the conversion

+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);

+ length=(int32_t)(dest-originalDest);

+ // if an overflow occurs, then get the preflighting length

+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {

+ char buffer[1024];

+ destLimit=buffer+sizeof(buffer);

+ do {

+ dest=buffer;

+ errorCode=U_ZERO_ERROR;

+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);

+ length+=(int32_t)(dest-buffer);

+ } while(errorCode==U_BUFFER_OVERFLOW_ERROR);

+ }

+ return u_terminateChars(originalDest, destCapacity, length, &errorCode);

+void

+UnicodeString::doCodepageCreate(const char *codepageData,

+ int32_t dataLength,

+ const char *codepage)

+ // if there's nothing to convert, do nothing

+ if(codepageData == 0 || dataLength == 0 || dataLength < -1) {

+ return;

+ }

+ if(dataLength == -1) {

+ dataLength = (int32_t)uprv_strlen(codepageData);

+ }

+ UErrorCode status = U_ZERO_ERROR;

+ // create the converter

+ // if the codepage is the default, use our cache

+ // if it is an empty string, then use the "invariant character" conversion

+ UConverter *converter;

+ if (codepage == 0) {

+ const char *defaultName = ucnv_getDefaultName();

+ if(UCNV_FAST_IS_UTF8(defaultName)) {

+ setToUTF8(StringPiece(codepageData, dataLength));

+ return;

+ }

+ converter = u_getDefaultConverter(&status);

+ } else if(*codepage == 0) {

+ // use the "invariant characters" conversion

+ if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {

+ u_charsToUChars(codepageData, getArrayStart(), dataLength);

+ setLength(dataLength);

+ } else {

+ setToBogus();

+ }

+ return;

+ } else {

+ converter = ucnv_open(codepage, &status);

+ }

+ // if we failed, set the appropriate flags and return

+ if(U_FAILURE(status)) {

+ setToBogus();

+ return;

+ }

+ // perform the conversion

+ doCodepageCreate(codepageData, dataLength, converter, status);

+ if(U_FAILURE(status)) {

+ setToBogus();

+ }

+ // close the converter

+ if(codepage == 0) {

+ u_releaseDefaultConverter(converter);

+ } else {

+ ucnv_close(converter);

+ }

+void

+UnicodeString::doCodepageCreate(const char *codepageData,

+ int32_t dataLength,

+ UConverter *converter,

+ UErrorCode &status)

+ if(U_FAILURE(status)) {

+ return;

+ }

+ // set up the conversion parameters

+ const char *mySource = codepageData;

+ const char *mySourceEnd = mySource + dataLength;

+ UChar *array, *myTarget;

+ // estimate the size needed:

+ int32_t arraySize;

+ if(dataLength <= US_STACKBUF_SIZE) {

+ // try to use the stack buffer

+ arraySize = US_STACKBUF_SIZE;

+ } else {

+ // 1.25 UChar's per source byte should cover most cases

+ arraySize = dataLength + (dataLength >> 2);

+ }

+ // we do not care about the current contents

+ UBool doCopyArray = FALSE;

+ for(;;) {

+ if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {

+ setToBogus();

+ break;

+ }

+ // perform the conversion

+ array = getArrayStart();

+ myTarget = array + length();

+ ucnv_toUnicode(converter, &myTarget, array + getCapacity(),

+ &mySource, mySourceEnd, 0, TRUE, &status);

+ // update the conversion parameters

+ setLength((int32_t)(myTarget - array));

+ // allocate more space and copy data, if needed

+ if(status == U_BUFFER_OVERFLOW_ERROR) {

+ // reset the error code

+ status = U_ZERO_ERROR;

+ // keep the previous conversion results

+ doCopyArray = TRUE;

+ // estimate the new size needed, larger than before

+ // try 2 UChar's per remaining source byte

+ arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));

+ } else {

+ break;

+ }

+U_NAMESPACE_END

+#endif

Property changes on: icu46/source/common/unistr_cnv.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/common/unistr_case.cpp ('k') | icu46/source/common/unistr_props.cpp » ('j') | no next file with comments »