icu46/source/common/unistr_cnv.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/unistr_cnv.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 1999-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: unistr_cnv.cpp

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:2

	12 *

	13 * created on: 2004aug19

	14 * created by: Markus W. Scherer

	15 *

	16 * Character conversion functions moved here from unistr.cpp

	17 */

	18

	19 #include "unicode/utypes.h"

	20

	21 #if !UCONFIG_NO_CONVERSION

	22

	23 #include "unicode/putil.h"

	24 #include "cstring.h"

	25 #include "cmemory.h"

	26 #include "unicode/ustring.h"

	27 #include "unicode/unistr.h"

	28 #include "unicode/ucnv.h"

	29 #include "ucnv_imp.h"

	30 #include "putilimp.h"

	31 #include "ustr_cnv.h"

	32 #include "ustr_imp.h"

	33

	34 U_NAMESPACE_BEGIN

	35

	36 //========================================

	37 // Constructors

	38 //========================================

	39

	40 #if !U_CHARSET_IS_UTF8

	41

	42 UnicodeString::UnicodeString(const char *codepageData)

	43 : fShortLength(0),

	44 fFlags(kShortString)

	45 {

	46 if(codepageData != 0) {

	47 doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);

	48 }

	49 }

	50

	51 UnicodeString::UnicodeString(const char *codepageData,

	52 int32_t dataLength)

	53 : fShortLength(0),

	54 fFlags(kShortString)

	55 {

	56 if(codepageData != 0) {

	57 doCodepageCreate(codepageData, dataLength, 0);

	58 }

	59 }

	60

	61 // else see unistr.cpp

	62 #endif

	63

	64 UnicodeString::UnicodeString(const char *codepageData,

	65 const char *codepage)

	66 : fShortLength(0),

	67 fFlags(kShortString)

	68 {

	69 if(codepageData != 0) {

	70 doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codep age);

	71 }

	72 }

	73

	74 UnicodeString::UnicodeString(const char *codepageData,

	75 int32_t dataLength,

	76 const char *codepage)

	77 : fShortLength(0),

	78 fFlags(kShortString)

	79 {

	80 if(codepageData != 0) {

	81 doCodepageCreate(codepageData, dataLength, codepage);

	82 }

	83 }

	84

	85 UnicodeString::UnicodeString(const char *src, int32_t srcLength,

	86 UConverter *cnv,

	87 UErrorCode &errorCode)

	88 : fShortLength(0),

	89 fFlags(kShortString)

	90 {

	91 if(U_SUCCESS(errorCode)) {

	92 // check arguments

	93 if(src==NULL) {

	94 // treat as an empty string, do nothing more

	95 } else if(srcLength<-1) {

	96 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	97 } else {

	98 // get input length

	99 if(srcLength==-1) {

	100 srcLength=(int32_t)uprv_strlen(src);

	101 }

	102 if(srcLength>0) {

	103 if(cnv!=0) {

	104 // use the provided converter

	105 ucnv_resetToUnicode(cnv);

	106 doCodepageCreate(src, srcLength, cnv, errorCode);

	107 } else {

	108 // use the default converter

	109 cnv=u_getDefaultConverter(&errorCode);

	110 doCodepageCreate(src, srcLength, cnv, errorCode);

	111 u_releaseDefaultConverter(cnv);

	112 }

	113 }

	114 }

	115

	116 if(U_FAILURE(errorCode)) {

	117 setToBogus();

	118 }

	119 }

	120 }

	121

	122 //========================================

	123 // Codeset conversion

	124 //========================================

	125

	126 #if !U_CHARSET_IS_UTF8

	127

	128 int32_t

	129 UnicodeString::extract(int32_t start,

	130 int32_t length,

	131 char *target,

	132 uint32_t dstSize) const {

	133 return extract(start, length, target, dstSize, 0);

	134 }

	135

	136 // else see unistr.cpp

	137 #endif

	138

	139 int32_t

	140 UnicodeString::extract(int32_t start,

	141 int32_t length,

	142 char *target,

	143 uint32_t dstSize,

	144 const char *codepage) const

	145 {

	146 // if the arguments are illegal, then do nothing

	147 if(/dstSize < 0 \|\| /(dstSize > 0 && target == 0)) {

	148 return 0;

	149 }

	150

	151 // pin the indices to legal values

	152 pinIndices(start, length);

	153

	154 // We need to cast dstSize to int32_t for all subsequent code.

	155 // I don't know why the API was defined with uint32_t but we are stuck with it.

	156 // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize

	157 // as a limit in some functions, it may wrap around and yield a pointer

	158 // that compares less-than target.

	159 int32_t capacity;

	160 if(dstSize < 0x7fffffff) {

	161 // Assume that the capacity is real and a limit pointer won't wrap aroun d.

	162 capacity = (int32_t)dstSize;

	163 } else {

	164 // Pin the capacity so that a limit pointer does not wrap around.

	165 char targetLimit = (char )U_MAX_PTR(target);

	166 // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff

	167 // greater than target and does not wrap around the top of the address s pace.

	168 capacity = (int32_t)(targetLimit - target);

	169 }

	170

	171 // create the converter

	172 UConverter *converter;

	173 UErrorCode status = U_ZERO_ERROR;

	174

	175 // just write the NUL if the string length is 0

	176 if(length == 0) {

	177 return u_terminateChars(target, capacity, 0, &status);

	178 }

	179

	180 // if the codepage is the default, use our cache

	181 // if it is an empty string, then use the "invariant character" conversion

	182 if (codepage == 0) {

	183 const char *defaultName = ucnv_getDefaultName();

	184 if(UCNV_FAST_IS_UTF8(defaultName)) {

	185 return toUTF8(start, length, target, capacity);

	186 }

	187 converter = u_getDefaultConverter(&status);

	188 } else if (*codepage == 0) {

	189 // use the "invariant characters" conversion

	190 int32_t destLength;

	191 if(length <= capacity) {

	192 destLength = length;

	193 } else {

	194 destLength = capacity;

	195 }

	196 u_UCharsToChars(getArrayStart() + start, target, destLength);

	197 return u_terminateChars(target, capacity, length, &status);

	198 } else {

	199 converter = ucnv_open(codepage, &status);

	200 }

	201

	202 length = doExtract(start, length, target, capacity, converter, status);

	203

	204 // close the converter

	205 if (codepage == 0) {

	206 u_releaseDefaultConverter(converter);

	207 } else {

	208 ucnv_close(converter);

	209 }

	210

	211 return length;

	212 }

	213

	214 int32_t

	215 UnicodeString::extract(char *dest, int32_t destCapacity,

	216 UConverter *cnv,

	217 UErrorCode &errorCode) const

	218 {

	219 if(U_FAILURE(errorCode)) {

	220 return 0;

	221 }

	222

	223 if(isBogus() \|\| destCapacity<0 \|\| (destCapacity>0 && dest==0)) {

	224 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	225 return 0;

	226 }

	227

	228 // nothing to do?

	229 if(isEmpty()) {

	230 return u_terminateChars(dest, destCapacity, 0, &errorCode);

	231 }

	232

	233 // get the converter

	234 UBool isDefaultConverter;

	235 if(cnv==0) {

	236 isDefaultConverter=TRUE;

	237 cnv=u_getDefaultConverter(&errorCode);

	238 if(U_FAILURE(errorCode)) {

	239 return 0;

	240 }

	241 } else {

	242 isDefaultConverter=FALSE;

	243 ucnv_resetFromUnicode(cnv);

	244 }

	245

	246 // convert

	247 int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);

	248

	249 // release the converter

	250 if(isDefaultConverter) {

	251 u_releaseDefaultConverter(cnv);

	252 }

	253

	254 return len;

	255 }

	256

	257 int32_t

	258 UnicodeString::doExtract(int32_t start, int32_t length,

	259 char *dest, int32_t destCapacity,

	260 UConverter *cnv,

	261 UErrorCode &errorCode) const

	262 {

	263 if(U_FAILURE(errorCode)) {

	264 if(destCapacity!=0) {

	265 *dest=0;

	266 }

	267 return 0;

	268 }

	269

	270 const UChar src=getArrayStart()+start, srcLimit=src+length;

	271 char *originalDest=dest;

	272 const char *destLimit;

	273

	274 if(destCapacity==0) {

	275 destLimit=dest=0;

	276 } else if(destCapacity==-1) {

	277 // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.

	278 destLimit=(char*)U_MAX_PTR(dest);

	279 // for NUL-termination, translate into highest int32_t

	280 destCapacity=0x7fffffff;

	281 } else {

	282 destLimit=dest+destCapacity;

	283 }

	284

	285 // perform the conversion

	286 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode) ;

	287 length=(int32_t)(dest-originalDest);

	288

	289 // if an overflow occurs, then get the preflighting length

	290 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {

	291 char buffer[1024];

	292

	293 destLimit=buffer+sizeof(buffer);

	294 do {

	295 dest=buffer;

	296 errorCode=U_ZERO_ERROR;

	297 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &er rorCode);

	298 length+=(int32_t)(dest-buffer);

	299 } while(errorCode==U_BUFFER_OVERFLOW_ERROR);

	300 }

	301

	302 return u_terminateChars(originalDest, destCapacity, length, &errorCode);

	303 }

	304

	305 void

	306 UnicodeString::doCodepageCreate(const char *codepageData,

	307 int32_t dataLength,

	308 const char *codepage)

	309 {

	310 // if there's nothing to convert, do nothing

	311 if(codepageData == 0 \|\| dataLength == 0 \|\| dataLength < -1) {

	312 return;

	313 }

	314 if(dataLength == -1) {

	315 dataLength = (int32_t)uprv_strlen(codepageData);

	316 }

	317

	318 UErrorCode status = U_ZERO_ERROR;

	319

	320 // create the converter

	321 // if the codepage is the default, use our cache

	322 // if it is an empty string, then use the "invariant character" conversion

	323 UConverter *converter;

	324 if (codepage == 0) {

	325 const char *defaultName = ucnv_getDefaultName();

	326 if(UCNV_FAST_IS_UTF8(defaultName)) {

	327 setToUTF8(StringPiece(codepageData, dataLength));

	328 return;

	329 }

	330 converter = u_getDefaultConverter(&status);

	331 } else if(*codepage == 0) {

	332 // use the "invariant characters" conversion

	333 if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {

	334 u_charsToUChars(codepageData, getArrayStart(), dataLength);

	335 setLength(dataLength);

	336 } else {

	337 setToBogus();

	338 }

	339 return;

	340 } else {

	341 converter = ucnv_open(codepage, &status);

	342 }

	343

	344 // if we failed, set the appropriate flags and return

	345 if(U_FAILURE(status)) {

	346 setToBogus();

	347 return;

	348 }

	349

	350 // perform the conversion

	351 doCodepageCreate(codepageData, dataLength, converter, status);

	352 if(U_FAILURE(status)) {

	353 setToBogus();

	354 }

	355

	356 // close the converter

	357 if(codepage == 0) {

	358 u_releaseDefaultConverter(converter);

	359 } else {

	360 ucnv_close(converter);

	361 }

	362 }

	363

	364 void

	365 UnicodeString::doCodepageCreate(const char *codepageData,

	366 int32_t dataLength,

	367 UConverter *converter,

	368 UErrorCode &status)

	369 {

	370 if(U_FAILURE(status)) {

	371 return;

	372 }

	373

	374 // set up the conversion parameters

	375 const char *mySource = codepageData;

	376 const char *mySourceEnd = mySource + dataLength;

	377 UChar array, myTarget;

	378

	379 // estimate the size needed:

	380 int32_t arraySize;

	381 if(dataLength <= US_STACKBUF_SIZE) {

	382 // try to use the stack buffer

	383 arraySize = US_STACKBUF_SIZE;

	384 } else {

	385 // 1.25 UChar's per source byte should cover most cases

	386 arraySize = dataLength + (dataLength >> 2);

	387 }

	388

	389 // we do not care about the current contents

	390 UBool doCopyArray = FALSE;

	391 for(;;) {

	392 if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {

	393 setToBogus();

	394 break;

	395 }

	396

	397 // perform the conversion

	398 array = getArrayStart();

	399 myTarget = array + length();

	400 ucnv_toUnicode(converter, &myTarget, array + getCapacity(),

	401 &mySource, mySourceEnd, 0, TRUE, &status);

	402

	403 // update the conversion parameters

	404 setLength((int32_t)(myTarget - array));

	405

	406 // allocate more space and copy data, if needed

	407 if(status == U_BUFFER_OVERFLOW_ERROR) {

	408 // reset the error code

	409 status = U_ZERO_ERROR;

	410

	411 // keep the previous conversion results

	412 doCopyArray = TRUE;

	413

	414 // estimate the new size needed, larger than before

	415 // try 2 UChar's per remaining source byte

	416 arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));

	417 } else {

	418 break;

	419 }

	420 }

	421 }

	422

	423 U_NAMESPACE_END

	424

	425 #endif

OLD	NEW

« no previous file with comments | « icu46/source/common/unistr_case.cpp ('k') | icu46/source/common/unistr_props.cpp » ('j') | no next file with comments »