Index: icu46/source/common/unistr_cnv.cpp |
=================================================================== |
--- icu46/source/common/unistr_cnv.cpp (revision 0) |
+++ icu46/source/common/unistr_cnv.cpp (revision 0) |
@@ -0,0 +1,425 @@ |
+/* |
+******************************************************************************* |
+* |
+* Copyright (C) 1999-2010, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+* |
+******************************************************************************* |
+* file name: unistr_cnv.cpp |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:2 |
+* |
+* created on: 2004aug19 |
+* created by: Markus W. Scherer |
+* |
+* Character conversion functions moved here from unistr.cpp |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_CONVERSION |
+ |
+#include "unicode/putil.h" |
+#include "cstring.h" |
+#include "cmemory.h" |
+#include "unicode/ustring.h" |
+#include "unicode/unistr.h" |
+#include "unicode/ucnv.h" |
+#include "ucnv_imp.h" |
+#include "putilimp.h" |
+#include "ustr_cnv.h" |
+#include "ustr_imp.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+//======================================== |
+// Constructors |
+//======================================== |
+ |
+#if !U_CHARSET_IS_UTF8 |
+ |
+UnicodeString::UnicodeString(const char *codepageData) |
+ : fShortLength(0), |
+ fFlags(kShortString) |
+{ |
+ if(codepageData != 0) { |
+ doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0); |
+ } |
+} |
+ |
+UnicodeString::UnicodeString(const char *codepageData, |
+ int32_t dataLength) |
+ : fShortLength(0), |
+ fFlags(kShortString) |
+{ |
+ if(codepageData != 0) { |
+ doCodepageCreate(codepageData, dataLength, 0); |
+ } |
+} |
+ |
+// else see unistr.cpp |
+#endif |
+ |
+UnicodeString::UnicodeString(const char *codepageData, |
+ const char *codepage) |
+ : fShortLength(0), |
+ fFlags(kShortString) |
+{ |
+ if(codepageData != 0) { |
+ doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage); |
+ } |
+} |
+ |
+UnicodeString::UnicodeString(const char *codepageData, |
+ int32_t dataLength, |
+ const char *codepage) |
+ : fShortLength(0), |
+ fFlags(kShortString) |
+{ |
+ if(codepageData != 0) { |
+ doCodepageCreate(codepageData, dataLength, codepage); |
+ } |
+} |
+ |
+UnicodeString::UnicodeString(const char *src, int32_t srcLength, |
+ UConverter *cnv, |
+ UErrorCode &errorCode) |
+ : fShortLength(0), |
+ fFlags(kShortString) |
+{ |
+ if(U_SUCCESS(errorCode)) { |
+ // check arguments |
+ if(src==NULL) { |
+ // treat as an empty string, do nothing more |
+ } else if(srcLength<-1) { |
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ } else { |
+ // get input length |
+ if(srcLength==-1) { |
+ srcLength=(int32_t)uprv_strlen(src); |
+ } |
+ if(srcLength>0) { |
+ if(cnv!=0) { |
+ // use the provided converter |
+ ucnv_resetToUnicode(cnv); |
+ doCodepageCreate(src, srcLength, cnv, errorCode); |
+ } else { |
+ // use the default converter |
+ cnv=u_getDefaultConverter(&errorCode); |
+ doCodepageCreate(src, srcLength, cnv, errorCode); |
+ u_releaseDefaultConverter(cnv); |
+ } |
+ } |
+ } |
+ |
+ if(U_FAILURE(errorCode)) { |
+ setToBogus(); |
+ } |
+ } |
+} |
+ |
+//======================================== |
+// Codeset conversion |
+//======================================== |
+ |
+#if !U_CHARSET_IS_UTF8 |
+ |
+int32_t |
+UnicodeString::extract(int32_t start, |
+ int32_t length, |
+ char *target, |
+ uint32_t dstSize) const { |
+ return extract(start, length, target, dstSize, 0); |
+} |
+ |
+// else see unistr.cpp |
+#endif |
+ |
+int32_t |
+UnicodeString::extract(int32_t start, |
+ int32_t length, |
+ char *target, |
+ uint32_t dstSize, |
+ const char *codepage) const |
+{ |
+ // if the arguments are illegal, then do nothing |
+ if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { |
+ return 0; |
+ } |
+ |
+ // pin the indices to legal values |
+ pinIndices(start, length); |
+ |
+ // We need to cast dstSize to int32_t for all subsequent code. |
+ // I don't know why the API was defined with uint32_t but we are stuck with it. |
+ // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize |
+ // as a limit in some functions, it may wrap around and yield a pointer |
+ // that compares less-than target. |
+ int32_t capacity; |
+ if(dstSize < 0x7fffffff) { |
+ // Assume that the capacity is real and a limit pointer won't wrap around. |
+ capacity = (int32_t)dstSize; |
+ } else { |
+ // Pin the capacity so that a limit pointer does not wrap around. |
+ char *targetLimit = (char *)U_MAX_PTR(target); |
+ // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff |
+ // greater than target and does not wrap around the top of the address space. |
+ capacity = (int32_t)(targetLimit - target); |
+ } |
+ |
+ // create the converter |
+ UConverter *converter; |
+ UErrorCode status = U_ZERO_ERROR; |
+ |
+ // just write the NUL if the string length is 0 |
+ if(length == 0) { |
+ return u_terminateChars(target, capacity, 0, &status); |
+ } |
+ |
+ // if the codepage is the default, use our cache |
+ // if it is an empty string, then use the "invariant character" conversion |
+ if (codepage == 0) { |
+ const char *defaultName = ucnv_getDefaultName(); |
+ if(UCNV_FAST_IS_UTF8(defaultName)) { |
+ return toUTF8(start, length, target, capacity); |
+ } |
+ converter = u_getDefaultConverter(&status); |
+ } else if (*codepage == 0) { |
+ // use the "invariant characters" conversion |
+ int32_t destLength; |
+ if(length <= capacity) { |
+ destLength = length; |
+ } else { |
+ destLength = capacity; |
+ } |
+ u_UCharsToChars(getArrayStart() + start, target, destLength); |
+ return u_terminateChars(target, capacity, length, &status); |
+ } else { |
+ converter = ucnv_open(codepage, &status); |
+ } |
+ |
+ length = doExtract(start, length, target, capacity, converter, status); |
+ |
+ // close the converter |
+ if (codepage == 0) { |
+ u_releaseDefaultConverter(converter); |
+ } else { |
+ ucnv_close(converter); |
+ } |
+ |
+ return length; |
+} |
+ |
+int32_t |
+UnicodeString::extract(char *dest, int32_t destCapacity, |
+ UConverter *cnv, |
+ UErrorCode &errorCode) const |
+{ |
+ if(U_FAILURE(errorCode)) { |
+ return 0; |
+ } |
+ |
+ if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { |
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ // nothing to do? |
+ if(isEmpty()) { |
+ return u_terminateChars(dest, destCapacity, 0, &errorCode); |
+ } |
+ |
+ // get the converter |
+ UBool isDefaultConverter; |
+ if(cnv==0) { |
+ isDefaultConverter=TRUE; |
+ cnv=u_getDefaultConverter(&errorCode); |
+ if(U_FAILURE(errorCode)) { |
+ return 0; |
+ } |
+ } else { |
+ isDefaultConverter=FALSE; |
+ ucnv_resetFromUnicode(cnv); |
+ } |
+ |
+ // convert |
+ int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode); |
+ |
+ // release the converter |
+ if(isDefaultConverter) { |
+ u_releaseDefaultConverter(cnv); |
+ } |
+ |
+ return len; |
+} |
+ |
+int32_t |
+UnicodeString::doExtract(int32_t start, int32_t length, |
+ char *dest, int32_t destCapacity, |
+ UConverter *cnv, |
+ UErrorCode &errorCode) const |
+{ |
+ if(U_FAILURE(errorCode)) { |
+ if(destCapacity!=0) { |
+ *dest=0; |
+ } |
+ return 0; |
+ } |
+ |
+ const UChar *src=getArrayStart()+start, *srcLimit=src+length; |
+ char *originalDest=dest; |
+ const char *destLimit; |
+ |
+ if(destCapacity==0) { |
+ destLimit=dest=0; |
+ } else if(destCapacity==-1) { |
+ // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used. |
+ destLimit=(char*)U_MAX_PTR(dest); |
+ // for NUL-termination, translate into highest int32_t |
+ destCapacity=0x7fffffff; |
+ } else { |
+ destLimit=dest+destCapacity; |
+ } |
+ |
+ // perform the conversion |
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); |
+ length=(int32_t)(dest-originalDest); |
+ |
+ // if an overflow occurs, then get the preflighting length |
+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) { |
+ char buffer[1024]; |
+ |
+ destLimit=buffer+sizeof(buffer); |
+ do { |
+ dest=buffer; |
+ errorCode=U_ZERO_ERROR; |
+ ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); |
+ length+=(int32_t)(dest-buffer); |
+ } while(errorCode==U_BUFFER_OVERFLOW_ERROR); |
+ } |
+ |
+ return u_terminateChars(originalDest, destCapacity, length, &errorCode); |
+} |
+ |
+void |
+UnicodeString::doCodepageCreate(const char *codepageData, |
+ int32_t dataLength, |
+ const char *codepage) |
+{ |
+ // if there's nothing to convert, do nothing |
+ if(codepageData == 0 || dataLength == 0 || dataLength < -1) { |
+ return; |
+ } |
+ if(dataLength == -1) { |
+ dataLength = (int32_t)uprv_strlen(codepageData); |
+ } |
+ |
+ UErrorCode status = U_ZERO_ERROR; |
+ |
+ // create the converter |
+ // if the codepage is the default, use our cache |
+ // if it is an empty string, then use the "invariant character" conversion |
+ UConverter *converter; |
+ if (codepage == 0) { |
+ const char *defaultName = ucnv_getDefaultName(); |
+ if(UCNV_FAST_IS_UTF8(defaultName)) { |
+ setToUTF8(StringPiece(codepageData, dataLength)); |
+ return; |
+ } |
+ converter = u_getDefaultConverter(&status); |
+ } else if(*codepage == 0) { |
+ // use the "invariant characters" conversion |
+ if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) { |
+ u_charsToUChars(codepageData, getArrayStart(), dataLength); |
+ setLength(dataLength); |
+ } else { |
+ setToBogus(); |
+ } |
+ return; |
+ } else { |
+ converter = ucnv_open(codepage, &status); |
+ } |
+ |
+ // if we failed, set the appropriate flags and return |
+ if(U_FAILURE(status)) { |
+ setToBogus(); |
+ return; |
+ } |
+ |
+ // perform the conversion |
+ doCodepageCreate(codepageData, dataLength, converter, status); |
+ if(U_FAILURE(status)) { |
+ setToBogus(); |
+ } |
+ |
+ // close the converter |
+ if(codepage == 0) { |
+ u_releaseDefaultConverter(converter); |
+ } else { |
+ ucnv_close(converter); |
+ } |
+} |
+ |
+void |
+UnicodeString::doCodepageCreate(const char *codepageData, |
+ int32_t dataLength, |
+ UConverter *converter, |
+ UErrorCode &status) |
+{ |
+ if(U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ // set up the conversion parameters |
+ const char *mySource = codepageData; |
+ const char *mySourceEnd = mySource + dataLength; |
+ UChar *array, *myTarget; |
+ |
+ // estimate the size needed: |
+ int32_t arraySize; |
+ if(dataLength <= US_STACKBUF_SIZE) { |
+ // try to use the stack buffer |
+ arraySize = US_STACKBUF_SIZE; |
+ } else { |
+ // 1.25 UChar's per source byte should cover most cases |
+ arraySize = dataLength + (dataLength >> 2); |
+ } |
+ |
+ // we do not care about the current contents |
+ UBool doCopyArray = FALSE; |
+ for(;;) { |
+ if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) { |
+ setToBogus(); |
+ break; |
+ } |
+ |
+ // perform the conversion |
+ array = getArrayStart(); |
+ myTarget = array + length(); |
+ ucnv_toUnicode(converter, &myTarget, array + getCapacity(), |
+ &mySource, mySourceEnd, 0, TRUE, &status); |
+ |
+ // update the conversion parameters |
+ setLength((int32_t)(myTarget - array)); |
+ |
+ // allocate more space and copy data, if needed |
+ if(status == U_BUFFER_OVERFLOW_ERROR) { |
+ // reset the error code |
+ status = U_ZERO_ERROR; |
+ |
+ // keep the previous conversion results |
+ doCopyArray = TRUE; |
+ |
+ // estimate the new size needed, larger than before |
+ // try 2 UChar's per remaining source byte |
+ arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource)); |
+ } else { |
+ break; |
+ } |
+ } |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif |
Property changes on: icu46/source/common/unistr_cnv.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |