icu46/source/common/ucnv_u7.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/ucnv_u7.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 2002-2009, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * file name: ucnv_u7.c

	7 * encoding: US-ASCII

	8 * tab size: 8 (not used)

	9 * indentation:4

	10 *

	11 * created on: 2002jul01

	12 * created by: Markus W. Scherer

	13 *

	14 * UTF-7 converter implementation. Used to be in ucnv_utf.c.

	15 */

	16

	17 #include "unicode/utypes.h"

	18

	19 #if !UCONFIG_NO_CONVERSION

	20

	21 #include "unicode/ucnv.h"

	22 #include "ucnv_bld.h"

	23 #include "ucnv_cnv.h"

	24

	25 /* UTF-7 -------------------------------------------------------------------- */

	26

	27 /*

	28 * UTF-7 is a stateful encoding of Unicode.

	29 * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)

	30 * It was intended for use in Internet email systems, using in its bytewise

	31 * encoding only a subset of 7-bit US-ASCII.

	32 * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still

	33 * occasionally used.

	34 *

	35 * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII

	36 * characters directly or in base64. Especially, the characters in set O

	37 * as defined in the RFC (see below) may be encoded directly but are not

	38 * allowed in, e.g., email headers.

	39 * By default, the ICU UTF-7 converter encodes set O directly.

	40 * By choosing the option "version=1", set O will be escaped instead.

	41 * For example:

	42 * utf7Converter=ucnv_open("UTF-7,version=1");

	43 *

	44 * For details about email headers see RFC 2047.

	45 */

	46

	47 /*

	48 * Tests for US-ASCII characters belonging to character classes

	49 * defined in UTF-7.

	50 *

	51 * Set D (directly encoded characters) consists of the following

	52 * characters: the upper and lower case letters A through Z

	53 * and a through z, the 10 digits 0-9, and the following nine special

	54 * characters (note that "+" and "=" are omitted):

	55 * '(),-./:?

	56 *

	57 * Set O (optional direct characters) consists of the following

	58 * characters (note that "\" and "~" are omitted):

	59 * !"#$%&*;<=>@[]^_`{\|}

	60 *

	61 * According to the rules in RFC 2152, the byte values for the following

	62 * US-ASCII characters are not used in UTF-7 and are therefore illegal:

	63 * - all C0 control codes except for CR LF TAB

	64 * - BACKSLASH

	65 * - TILDE

	66 * - DEL

	67 * - all codes beyond US-ASCII, i.e. all >127

	68 */

	69 #define inSetD(c) \

	70 ((uint8_t)((c)-97)<26 \|\| (uint8_t)((c)-65)<26 \|\| /* letters */ \

	71 (uint8_t)((c)-48)<10 \|\| /* digits */ \

	72 (uint8_t)((c)-39)<3 \|\| /* '() */ \

	73 (uint8_t)((c)-44)<4 \|\| /* ,-./ */ \

	74 (c)==58 \|\| (c)==63 /* :? */ \

	75 )

	76

	77 #define inSetO(c) \

	78 ((uint8_t)((c)-33)<6 \|\| /* !"#$%& */ \

	79 (uint8_t)((c)-59)<4 \|\| /* ;<=> */ \

	80 (uint8_t)((c)-93)<4 \|\| /* ]^_` */ \

	81 (uint8_t)((c)-123)<3 \|\| /* {\|} */ \

	82 (c)==42 \|\| (c)==64 \|\| (c)==91 /* @[ / \

	83 )

	84

	85 #define isCRLFTAB(c) ((c)==13 \|\| (c)==10 \|\| (c)==9)

	86 #define isCRLFSPTAB(c) ((c)==32 \|\| (c)==13 \|\| (c)==10 \|\| (c)==9)

	87

	88 #define PLUS 43

	89 #define MINUS 45

	90 #define BACKSLASH 92

	91 #define TILDE 126

	92

	93 /* legal byte values: all US-ASCII graphic characters from space to before tilde , and CR LF TAB */

	94 #define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) \|\| isCRLFTAB(c) )

	95

	96 /* encode directly sets D and O and CR LF SP TAB */

	97 static const UBool encodeDirectlyMaximum[128]={

	98 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */

	99 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,

	100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

	101

	102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,

	103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

	104

	105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

	106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,

	107

	108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

	109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0

	110 };

	111

	112 /* encode directly set D and CR LF SP TAB but not set O */

	113 static const UBool encodeDirectlyRestricted[128]={

	114 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */

	115 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,

	116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

	117

	118 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,

	119 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,

	120

	121 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

	122 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,

	123

	124 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

	125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0

	126 };

	127

	128 static const uint8_t

	129 toBase64[64]={

	130 /* A-Z */

	131 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,

	132 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,

	133 /* a-z */

	134 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,

	135 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,

	136 /* 0-9 */

	137 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,

	138 /* +/ */

	139 43, 47

	140 };

	141

	142 static const int8_t

	143 fromBase64[128]={

	144 /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */

	145 -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,

	146 -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,

	147

	148 /* general punctuation with + and / and a special value (-2) for - */

	149 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,

	150 /* digits */

	151 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,

	152

	153 /* A-Z */

	154 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,

	155 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,

	156

	157 /* a-z */

	158 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,

	159 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3

	160 };

	161

	162 /*

	163 * converter status values:

	164 *

	165 * toUnicodeStatus:

	166 * 24 inDirectMode (boolean)

	167 * 23..16 base64Counter (-1..7)

	168 * 15..0 bits (up to 14 bits incoming base64)

	169 *

	170 * fromUnicodeStatus:

	171 * 31..28 version (0: set O direct 1: set O escaped)

	172 * 24 inDirectMode (boolean)

	173 * 23..16 base64Counter (0..2)

	174 * 7..0 bits (6 bits outgoing base64)

	175 *

	176 */

	177

	178 static void

	179 _UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {

	180 if(choice<=UCNV_RESET_TO_UNICODE) {

	181 /* reset toUnicode */

	182 cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */

	183 cnv->toULength=0;

	184 }

	185 if(choice!=UCNV_RESET_TO_UNICODE) {

	186 /* reset fromUnicode */

	187 cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)\|0x1000000; /* keep version, inDirectMode=TRUE */

	188 }

	189 }

	190

	191 static void

	192 _UTF7Open(UConverter *cnv,

	193 UConverterLoadArgs *pArgs,

	194 UErrorCode *pErrorCode) {

	195 if(UCNV_GET_VERSION(cnv)<=1) {

	196 /* TODO(markus): Should just use cnv->options rather than copying the ve rsion number. */

	197 cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;

	198 _UTF7Reset(cnv, UCNV_RESET_BOTH);

	199 } else {

	200 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	201 }

	202 }

	203

	204 static void

	205 _UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,

	206 UErrorCode *pErrorCode) {

	207 UConverter *cnv;

	208 const uint8_t source, sourceLimit;

	209 UChar *target;

	210 const UChar *targetLimit;

	211 int32_t *offsets;

	212

	213 uint8_t *bytes;

	214 uint8_t byteIndex;

	215

	216 int32_t length, targetCapacity;

	217

	218 /* UTF-7 state */

	219 uint16_t bits;

	220 int8_t base64Counter;

	221 UBool inDirectMode;

	222

	223 int8_t base64Value;

	224

	225 int32_t sourceIndex, nextSourceIndex;

	226

	227 uint8_t b;

	228 /* set up the local pointers */

	229 cnv=pArgs->converter;

	230

	231 source=(const uint8_t *)pArgs->source;

	232 sourceLimit=(const uint8_t *)pArgs->sourceLimit;

	233 target=pArgs->target;

	234 targetLimit=pArgs->targetLimit;

	235 offsets=pArgs->offsets;

	236 /* get the state machine state */

	237 {

	238 uint32_t status=cnv->toUnicodeStatus;

	239 inDirectMode=(UBool)((status>>24)&1);

	240 base64Counter=(int8_t)(status>>16);

	241 bits=(uint16_t)status;

	242 }

	243 bytes=cnv->toUBytes;

	244 byteIndex=cnv->toULength;

	245

	246 /* sourceIndex=-1 if the current character began in the previous buffer */

	247 sourceIndex=byteIndex==0 ? 0 : -1;

	248 nextSourceIndex=0;

	249

	250 if(inDirectMode) {

	251 directMode:

	252 /*

	253 * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,

	254 * with their US-ASCII byte values.

	255 * Backslash and Tilde and most control characters are not allowed in UT F-7.

	256 * A plus sign starts Unicode (or "escape") Mode.

	257 *

	258 * In Direct Mode, only the sourceIndex is used.

	259 */

	260 byteIndex=0;

	261 length=(int32_t)(sourceLimit-source);

	262 targetCapacity=(int32_t)(targetLimit-target);

	263 if(length>targetCapacity) {

	264 length=targetCapacity;

	265 }

	266 while(length>0) {

	267 b=*source++;

	268 if(!isLegalUTF7(b)) {

	269 /* illegal */

	270 bytes[0]=b;

	271 byteIndex=1;

	272 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	273 break;

	274 } else if(b!=PLUS) {

	275 /* write directly encoded character */

	276 *target++=b;

	277 if(offsets!=NULL) {

	278 *offsets++=sourceIndex++;

	279 }

	280 } else /* PLUS */ {

	281 /* switch to Unicode mode */

	282 nextSourceIndex=++sourceIndex;

	283 inDirectMode=FALSE;

	284 byteIndex=0;

	285 bits=0;

	286 base64Counter=-1;

	287 goto unicodeMode;

	288 }

	289 --length;

	290 }

	291 if(source<sourceLimit && target>=targetLimit) {

	292 /* target is full */

	293 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	294 }

	295 } else {

	296 unicodeMode:

	297 /*

	298 * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.

	299 * The base64 sequence ends with any character that is not in the base64 alphabet.

	300 * A terminating minus sign is consumed.

	301 *

	302 * In Unicode Mode, the sourceIndex has the index to the start of the cu rrent

	303 * base64 bytes, while nextSourceIndex is precisely parallel to source,

	304 * keeping the index to the following byte.

	305 * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.

	306 */

	307 while(source<sourceLimit) {

	308 if(target<targetLimit) {

	309 bytes[byteIndex++]=b=*source++;

	310 ++nextSourceIndex;

	311 if(b>=126) {

	312 /* illegal - test other illegal US-ASCII values by base64Val ue==-3 */

	313 inDirectMode=TRUE;

	314 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	315 break;

	316 } else if((base64Value=fromBase64[b])>=0) {

	317 /* collect base64 bytes into UChars */

	318 switch(base64Counter) {

	319 case -1: /* -1 is immediately after the + */

	320 case 0:

	321 bits=base64Value;

	322 base64Counter=1;

	323 break;

	324 case 1:

	325 case 3:

	326 case 4:

	327 case 6:

	328 bits=(uint16_t)((bits<<6)\|base64Value);

	329 ++base64Counter;

	330 break;

	331 case 2:

	332 *target++=(UChar)((bits<<4)\|(base64Value>>2));

	333 if(offsets!=NULL) {

	334 *offsets++=sourceIndex;

	335 sourceIndex=nextSourceIndex-1;

	336 }

	337 bytes[0]=b; /* keep this byte in case an error occurs */

	338 byteIndex=1;

	339 bits=(uint16_t)(base64Value&3);

	340 base64Counter=3;

	341 break;

	342 case 5:

	343 *target++=(UChar)((bits<<2)\|(base64Value>>4));

	344 if(offsets!=NULL) {

	345 *offsets++=sourceIndex;

	346 sourceIndex=nextSourceIndex-1;

	347 }

	348 bytes[0]=b; /* keep this byte in case an error occurs */

	349 byteIndex=1;

	350 bits=(uint16_t)(base64Value&15);

	351 base64Counter=6;

	352 break;

	353 case 7:

	354 *target++=(UChar)((bits<<6)\|base64Value);

	355 if(offsets!=NULL) {

	356 *offsets++=sourceIndex;

	357 sourceIndex=nextSourceIndex;

	358 }

	359 byteIndex=0;

	360 bits=0;

	361 base64Counter=0;

	362 break;

	363 default:

	364 /* will never occur */

	365 break;

	366 }

	367 } else if(base64Value==-2) {

	368 /* minus sign terminates the base64 sequence */

	369 inDirectMode=TRUE;

	370 if(base64Counter==-1) {

	371 /* +- i.e. a minus immediately following a plus */

	372 *target++=PLUS;

	373 if(offsets!=NULL) {

	374 *offsets++=sourceIndex-1;

	375 }

	376 } else {

	377 /* absorb the minus and leave the Unicode Mode */

	378 if(bits!=0) {

	379 /* bits are illegally left over, a UChar is incomple te */

	380 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	381 break;

	382 }

	383 }

	384 sourceIndex=nextSourceIndex;

	385 goto directMode;

	386 } else if(base64Value==-1) /* for any legal character except bas e64 and minus sign */ {

	387 /* leave the Unicode Mode */

	388 inDirectMode=TRUE;

	389 if(base64Counter==-1) {

	390 /* illegal: + immediately followed by something other th an base64 or minus sign */

	391 /* include the plus sign in the reported sequence */

	392 --sourceIndex;

	393 bytes[0]=PLUS;

	394 bytes[1]=b;

	395 byteIndex=2;

	396 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	397 break;

	398 } else if(bits==0) {

	399 /* un-read the character in case it is a plus sign */

	400 --source;

	401 sourceIndex=nextSourceIndex-1;

	402 goto directMode;

	403 } else {

	404 /* bits are illegally left over, a UChar is incomplete * /

	405 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	406 break;

	407 }

	408 } else /* base64Value==-3 for illegal characters */ {

	409 /* illegal */

	410 inDirectMode=TRUE;

	411 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	412 break;

	413 }

	414 } else {

	415 /* target is full */

	416 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	417 break;

	418 }

	419 }

	420 }

	421

	422 if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {

	423 /*

	424 * if we are in Unicode mode, then the byteIndex might not be 0,

	425 * but that is ok if bits==0

	426 * -> we set byteIndex=0 at the end of the stream to avoid a truncated e rror

	427 * (not true for IMAP-mailbox-name where we must end in direct mode)

	428 */

	429 byteIndex=0;

	430 }

	431

	432 /* set the converter state back into UConverter */

	433 cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)\|((uint32_t)((uint8_t)base6 4Counter)<<16)\|(uint32_t)bits;

	434 cnv->toULength=byteIndex;

	435

	436 /* write back the updated pointers */

	437 pArgs->source=(const char *)source;

	438 pArgs->target=target;

	439 pArgs->offsets=offsets;

	440 return;

	441 }

	442

	443 static void

	444 _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,

	445 UErrorCode *pErrorCode) {

	446 UConverter *cnv;

	447 const UChar source, sourceLimit;

	448 uint8_t target, targetLimit;

	449 int32_t *offsets;

	450

	451 int32_t length, targetCapacity, sourceIndex;

	452 UChar c;

	453

	454 /* UTF-7 state */

	455 const UBool *encodeDirectly;

	456 uint8_t bits;

	457 int8_t base64Counter;

	458 UBool inDirectMode;

	459

	460 /* set up the local pointers */

	461 cnv=pArgs->converter;

	462

	463 /* set up the local pointers */

	464 source=pArgs->source;

	465 sourceLimit=pArgs->sourceLimit;

	466 target=(uint8_t *)pArgs->target;

	467 targetLimit=(uint8_t *)pArgs->targetLimit;

	468 offsets=pArgs->offsets;

	469

	470 /* get the state machine state */

	471 {

	472 uint32_t status=cnv->fromUnicodeStatus;

	473 encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirect lyRestricted;

	474 inDirectMode=(UBool)((status>>24)&1);

	475 base64Counter=(int8_t)(status>>16);

	476 bits=(uint8_t)status;

	477 }

	478

	479 /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple s ourceIndex */

	480 sourceIndex=0;

	481

	482 if(inDirectMode) {

	483 directMode:

	484 length=(int32_t)(sourceLimit-source);

	485 targetCapacity=(int32_t)(targetLimit-target);

	486 if(length>targetCapacity) {

	487 length=targetCapacity;

	488 }

	489 while(length>0) {

	490 c=*source++;

	491 /* currently always encode CR LF SP TAB directly */

	492 if(c<=127 && encodeDirectly[c]) {

	493 /* encode directly */

	494 *target++=(uint8_t)c;

	495 if(offsets!=NULL) {

	496 *offsets++=sourceIndex++;

	497 }

	498 } else if(c==PLUS) {

	499 /* output +- for + */

	500 *target++=PLUS;

	501 if(target<targetLimit) {

	502 *target++=MINUS;

	503 if(offsets!=NULL) {

	504 *offsets++=sourceIndex;

	505 *offsets++=sourceIndex++;

	506 }

	507 /* realign length and targetCapacity */

	508 goto directMode;

	509 } else {

	510 if(offsets!=NULL) {

	511 *offsets++=sourceIndex++;

	512 }

	513 cnv->charErrorBuffer[0]=MINUS;

	514 cnv->charErrorBufferLength=1;

	515 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	516 break;

	517 }

	518 } else {

	519 /* un-read this character and switch to Unicode Mode */

	520 --source;

	521 *target++=PLUS;

	522 if(offsets!=NULL) {

	523 *offsets++=sourceIndex;

	524 }

	525 inDirectMode=FALSE;

	526 base64Counter=0;

	527 goto unicodeMode;

	528 }

	529 --length;

	530 }

	531 if(source<sourceLimit && target>=targetLimit) {

	532 /* target is full */

	533 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	534 }

	535 } else {

	536 unicodeMode:

	537 while(source<sourceLimit) {

	538 if(target<targetLimit) {

	539 c=*source++;

	540 if(c<=127 && encodeDirectly[c]) {

	541 /* encode directly */

	542 inDirectMode=TRUE;

	543

	544 /* trick: back out this character to make this easier */

	545 --source;

	546

	547 /* terminate the base64 sequence */

	548 if(base64Counter!=0) {

	549 /* write remaining bits for the previous character */

	550 *target++=toBase64[bits];

	551 if(offsets!=NULL) {

	552 *offsets++=sourceIndex-1;

	553 }

	554 }

	555 if(fromBase64[c]!=-1) {

	556 /* need to terminate with a minus */

	557 if(target<targetLimit) {

	558 *target++=MINUS;

	559 if(offsets!=NULL) {

	560 *offsets++=sourceIndex-1;

	561 }

	562 } else {

	563 cnv->charErrorBuffer[0]=MINUS;

	564 cnv->charErrorBufferLength=1;

	565 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	566 break;

	567 }

	568 }

	569 goto directMode;

	570 } else {

	571 /*

	572 * base64 this character:

	573 * Output 2 or 3 base64 bytes for the remaining bits of the previous character

	574 * and the bits of this character, each implicitly in UTF-16 BE.

	575 *

	576 * Here, bits is an 8-bit variable because only 6 bits need to be kept from one

	577 * character to the next. The actual 2 or 4 bits are shifted to the left edge

	578 * of the 6-bits field 5..0 to make the termination of the b ase64 sequence easier.

	579 */

	580 switch(base64Counter) {

	581 case 0:

	582 *target++=toBase64[c>>10];

	583 if(target<targetLimit) {

	584 *target++=toBase64[(c>>4)&0x3f];

	585 if(offsets!=NULL) {

	586 *offsets++=sourceIndex;

	587 *offsets++=sourceIndex++;

	588 }

	589 } else {

	590 if(offsets!=NULL) {

	591 *offsets++=sourceIndex++;

	592 }

	593 cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];

	594 cnv->charErrorBufferLength=1;

	595 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	596 }

	597 bits=(uint8_t)((c&15)<<2);

	598 base64Counter=1;

	599 break;

	600 case 1:

	601 *target++=toBase64[bits\|(c>>14)];

	602 if(target<targetLimit) {

	603 *target++=toBase64[(c>>8)&0x3f];

	604 if(target<targetLimit) {

	605 *target++=toBase64[(c>>2)&0x3f];

	606 if(offsets!=NULL) {

	607 *offsets++=sourceIndex;

	608 *offsets++=sourceIndex;

	609 *offsets++=sourceIndex++;

	610 }

	611 } else {

	612 if(offsets!=NULL) {

	613 *offsets++=sourceIndex;

	614 *offsets++=sourceIndex++;

	615 }

	616 cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];

	617 cnv->charErrorBufferLength=1;

	618 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	619 }

	620 } else {

	621 if(offsets!=NULL) {

	622 *offsets++=sourceIndex++;

	623 }

	624 cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];

	625 cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];

	626 cnv->charErrorBufferLength=2;

	627 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	628 }

	629 bits=(uint8_t)((c&3)<<4);

	630 base64Counter=2;

	631 break;

	632 case 2:

	633 *target++=toBase64[bits\|(c>>12)];

	634 if(target<targetLimit) {

	635 *target++=toBase64[(c>>6)&0x3f];

	636 if(target<targetLimit) {

	637 *target++=toBase64[c&0x3f];

	638 if(offsets!=NULL) {

	639 *offsets++=sourceIndex;

	640 *offsets++=sourceIndex;

	641 *offsets++=sourceIndex++;

	642 }

	643 } else {

	644 if(offsets!=NULL) {

	645 *offsets++=sourceIndex;

	646 *offsets++=sourceIndex++;

	647 }

	648 cnv->charErrorBuffer[0]=toBase64[c&0x3f];

	649 cnv->charErrorBufferLength=1;

	650 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	651 }

	652 } else {

	653 if(offsets!=NULL) {

	654 *offsets++=sourceIndex++;

	655 }

	656 cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];

	657 cnv->charErrorBuffer[1]=toBase64[c&0x3f];

	658 cnv->charErrorBufferLength=2;

	659 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	660 }

	661 bits=0;

	662 base64Counter=0;

	663 break;

	664 default:

	665 /* will never occur */

	666 break;

	667 }

	668 }

	669 } else {

	670 /* target is full */

	671 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	672 break;

	673 }

	674 }

	675 }

	676

	677 if(pArgs->flush && source>=sourceLimit) {

	678 /* flush remaining bits to the target */

	679 if(!inDirectMode && base64Counter!=0) {

	680 if(target<targetLimit) {

	681 *target++=toBase64[bits];

	682 if(offsets!=NULL) {

	683 *offsets++=sourceIndex-1;

	684 }

	685 } else {

	686 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits ];

	687 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	688 }

	689 }

	690 /* reset the state for the next conversion */

	691 cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)\|0x1000000; /* keep version, inDirectMode=TRUE */

	692 } else {

	693 /* set the converter state back into UConverter */

	694 cnv->fromUnicodeStatus=

	695 (cnv->fromUnicodeStatus&0xf0000000)\| /* keep version*/

	696 ((uint32_t)inDirectMode<<24)\|((uint32_t)base64Counter<<16)\|(uint32_t )bits;

	697 }

	698

	699 /* write back the updated pointers */

	700 pArgs->source=source;

	701 pArgs->target=(char *)target;

	702 pArgs->offsets=offsets;

	703 return;

	704 }

	705

	706 static const char *

	707 _UTF7GetName(const UConverter *cnv) {

	708 switch(cnv->fromUnicodeStatus>>28) {

	709 case 1:

	710 return "UTF-7,version=1";

	711 default:

	712 return "UTF-7";

	713 }

	714 }

	715

	716 static const UConverterImpl _UTF7Impl={

	717 UCNV_UTF7,

	718

	719 NULL,

	720 NULL,

	721

	722 _UTF7Open,

	723 NULL,

	724 _UTF7Reset,

	725

	726 _UTF7ToUnicodeWithOffsets,

	727 _UTF7ToUnicodeWithOffsets,

	728 _UTF7FromUnicodeWithOffsets,

	729 _UTF7FromUnicodeWithOffsets,

	730 NULL,

	731

	732 NULL,

	733 _UTF7GetName,

	734 NULL, /* we don't need writeSub() because we never call a callback at fromUn icode() */

	735 NULL,

	736 ucnv_getCompleteUnicodeSet

	737 };

	738

	739 static const UConverterStaticData _UTF7StaticData={

	740 sizeof(UConverterStaticData),

	741 "UTF-7",

	742 0, /* TODO CCSID for UTF-7 */

	743 UCNV_IBM, UCNV_UTF7,

	744 1, 4,

	745 { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */

	746 FALSE, FALSE,

	747 0,

	748 0,

	749 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

	750 };

	751

	752 const UConverterSharedData _UTF7Data={

	753 sizeof(UConverterSharedData), ~((uint32_t)0),

	754 NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,

	755 0

	756 };

	757

	758 /* IMAP mailbox name encoding ----------------------------------------------- */

	759

	760 /*

	761 * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1

	762 * http://www.ietf.org/rfc/rfc2060.txt

	763 *

	764 * 5.1.3. Mailbox International Naming Convention

	765 *

	766 * By convention, international mailbox names are specified using a

	767 * modified version of the UTF-7 encoding described in [UTF-7]. The

	768 * purpose of these modifications is to correct the following problems

	769 * with UTF-7:

	770 *

	771 * 1) UTF-7 uses the "+" character for shifting; this conflicts with

	772 * the common use of "+" in mailbox names, in particular USENET

	773 * newsgroup names.

	774 *

	775 * 2) UTF-7's encoding is BASE64 which uses the "/" character; this

	776 * conflicts with the use of "/" as a popular hierarchy delimiter.

	777 *

	778 * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with

	779 * the use of "\" as a popular hierarchy delimiter.

	780 *

	781 * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with

	782 * the use of "~" in some servers as a home directory indicator.

	783 *

	784 * 5) UTF-7 permits multiple alternate forms to represent the same

	785 * string; in particular, printable US-ASCII chararacters can be

	786 * represented in encoded form.

	787 *

	788 * In modified UTF-7, printable US-ASCII characters except for "&"

	789 * represent themselves; that is, characters with octet values 0x20-0x25

	790 * and 0x27-0x7e. The character "&" (0x26) is represented by the two-

	791 * octet sequence "&-".

	792 *

	793 * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all

	794 * Unicode 16-bit octets) are represented in modified BASE64, with a

	795 * further modification from [UTF-7] that "," is used instead of "/".

	796 * Modified BASE64 MUST NOT be used to represent any printing US-ASCII

	797 * character which can represent itself.

	798 *

	799 * "&" is used to shift to modified BASE64 and "-" to shift back to US-

	800 * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that

	801 * is, a name that ends with a Unicode 16-bit octet MUST end with a "-

	802 * ").

	803 *

	804 * For example, here is a mailbox name which mixes English, Japanese,

	805 * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-

	806 */

	807

	808 /*

	809 * Tests for US-ASCII characters belonging to character classes

	810 * defined in UTF-7.

	811 *

	812 * Set D (directly encoded characters) consists of the following

	813 * characters: the upper and lower case letters A through Z

	814 * and a through z, the 10 digits 0-9, and the following nine special

	815 * characters (note that "+" and "=" are omitted):

	816 * '(),-./:?

	817 *

	818 * Set O (optional direct characters) consists of the following

	819 * characters (note that "\" and "~" are omitted):

	820 * !"#$%&*;<=>@[]^_`{\|}

	821 *

	822 * According to the rules in RFC 2152, the byte values for the following

	823 * US-ASCII characters are not used in UTF-7 and are therefore illegal:

	824 * - all C0 control codes except for CR LF TAB

	825 * - BACKSLASH

	826 * - TILDE

	827 * - DEL

	828 * - all codes beyond US-ASCII, i.e. all >127

	829 */

	830

	831 /* uses '&' not '+' to start a base64 sequence */

	832 #define AMPERSAND 0x26

	833 #define COMMA 0x2c

	834 #define SLASH 0x2f

	835

	836 /* legal byte values: all US-ASCII graphic characters 0x20..0x7e */

	837 #define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)

	838

	839 /* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */

	840 #define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)

	841

	842 #define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)

	843 #define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])

	844

	845 /*

	846 * converter status values:

	847 *

	848 * toUnicodeStatus:

	849 * 24 inDirectMode (boolean)

	850 * 23..16 base64Counter (-1..7)

	851 * 15..0 bits (up to 14 bits incoming base64)

	852 *

	853 * fromUnicodeStatus:

	854 * 24 inDirectMode (boolean)

	855 * 23..16 base64Counter (0..2)

	856 * 7..0 bits (6 bits outgoing base64)

	857 *

	858 * ignore bits 31..25

	859 */

	860

	861 static void

	862 _IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,

	863 UErrorCode *pErrorCode) {

	864 UConverter *cnv;

	865 const uint8_t source, sourceLimit;

	866 UChar *target;

	867 const UChar *targetLimit;

	868 int32_t *offsets;

	869

	870 uint8_t *bytes;

	871 uint8_t byteIndex;

	872

	873 int32_t length, targetCapacity;

	874

	875 /* UTF-7 state */

	876 uint16_t bits;

	877 int8_t base64Counter;

	878 UBool inDirectMode;

	879

	880 int8_t base64Value;

	881

	882 int32_t sourceIndex, nextSourceIndex;

	883

	884 UChar c;

	885 uint8_t b;

	886

	887 /* set up the local pointers */

	888 cnv=pArgs->converter;

	889

	890 source=(const uint8_t *)pArgs->source;

	891 sourceLimit=(const uint8_t *)pArgs->sourceLimit;

	892 target=pArgs->target;

	893 targetLimit=pArgs->targetLimit;

	894 offsets=pArgs->offsets;

	895 /* get the state machine state */

	896 {

	897 uint32_t status=cnv->toUnicodeStatus;

	898 inDirectMode=(UBool)((status>>24)&1);

	899 base64Counter=(int8_t)(status>>16);

	900 bits=(uint16_t)status;

	901 }

	902 bytes=cnv->toUBytes;

	903 byteIndex=cnv->toULength;

	904

	905 /* sourceIndex=-1 if the current character began in the previous buffer */

	906 sourceIndex=byteIndex==0 ? 0 : -1;

	907 nextSourceIndex=0;

	908

	909 if(inDirectMode) {

	910 directMode:

	911 /*

	912 * In Direct Mode, US-ASCII characters are encoded directly, i.e.,

	913 * with their US-ASCII byte values.

	914 * An ampersand starts Unicode (or "escape") Mode.

	915 *

	916 * In Direct Mode, only the sourceIndex is used.

	917 */

	918 byteIndex=0;

	919 length=(int32_t)(sourceLimit-source);

	920 targetCapacity=(int32_t)(targetLimit-target);

	921 if(length>targetCapacity) {

	922 length=targetCapacity;

	923 }

	924 while(length>0) {

	925 b=*source++;

	926 if(!isLegalIMAP(b)) {

	927 /* illegal */

	928 bytes[0]=b;

	929 byteIndex=1;

	930 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	931 break;

	932 } else if(b!=AMPERSAND) {

	933 /* write directly encoded character */

	934 *target++=b;

	935 if(offsets!=NULL) {

	936 *offsets++=sourceIndex++;

	937 }

	938 } else /* AMPERSAND */ {

	939 /* switch to Unicode mode */

	940 nextSourceIndex=++sourceIndex;

	941 inDirectMode=FALSE;

	942 byteIndex=0;

	943 bits=0;

	944 base64Counter=-1;

	945 goto unicodeMode;

	946 }

	947 --length;

	948 }

	949 if(source<sourceLimit && target>=targetLimit) {

	950 /* target is full */

	951 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	952 }

	953 } else {

	954 unicodeMode:

	955 /*

	956 * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.

	957 * The base64 sequence ends with any character that is not in the base64 alphabet.

	958 * A terminating minus sign is consumed.

	959 * US-ASCII must not be base64-ed.

	960 *

	961 * In Unicode Mode, the sourceIndex has the index to the start of the cu rrent

	962 * base64 bytes, while nextSourceIndex is precisely parallel to source,

	963 * keeping the index to the following byte.

	964 * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.

	965 */

	966 while(source<sourceLimit) {

	967 if(target<targetLimit) {

	968 bytes[byteIndex++]=b=*source++;

	969 ++nextSourceIndex;

	970 if(b>0x7e) {

	971 /* illegal - test other illegal US-ASCII values by base64Val ue==-3 */

	972 inDirectMode=TRUE;

	973 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	974 break;

	975 } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {

	976 /* collect base64 bytes into UChars */

	977 switch(base64Counter) {

	978 case -1: /* -1 is immediately after the & */

	979 case 0:

	980 bits=base64Value;

	981 base64Counter=1;

	982 break;

	983 case 1:

	984 case 3:

	985 case 4:

	986 case 6:

	987 bits=(uint16_t)((bits<<6)\|base64Value);

	988 ++base64Counter;

	989 break;

	990 case 2:

	991 c=(UChar)((bits<<4)\|(base64Value>>2));

	992 if(isLegalIMAP(c)) {

	993 /* illegal */

	994 inDirectMode=TRUE;

	995 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	996 goto endloop;

	997 }

	998 *target++=c;

	999 if(offsets!=NULL) {

	1000 *offsets++=sourceIndex;

	1001 sourceIndex=nextSourceIndex-1;

	1002 }

	1003 bytes[0]=b; /* keep this byte in case an error occurs */

	1004 byteIndex=1;

	1005 bits=(uint16_t)(base64Value&3);

	1006 base64Counter=3;

	1007 break;

	1008 case 5:

	1009 c=(UChar)((bits<<2)\|(base64Value>>4));

	1010 if(isLegalIMAP(c)) {

	1011 /* illegal */

	1012 inDirectMode=TRUE;

	1013 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	1014 goto endloop;

	1015 }

	1016 *target++=c;

	1017 if(offsets!=NULL) {

	1018 *offsets++=sourceIndex;

	1019 sourceIndex=nextSourceIndex-1;

	1020 }

	1021 bytes[0]=b; /* keep this byte in case an error occurs */

	1022 byteIndex=1;

	1023 bits=(uint16_t)(base64Value&15);

	1024 base64Counter=6;

	1025 break;

	1026 case 7:

	1027 c=(UChar)((bits<<6)\|base64Value);

	1028 if(isLegalIMAP(c)) {

	1029 /* illegal */

	1030 inDirectMode=TRUE;

	1031 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	1032 goto endloop;

	1033 }

	1034 *target++=c;

	1035 if(offsets!=NULL) {

	1036 *offsets++=sourceIndex;

	1037 sourceIndex=nextSourceIndex;

	1038 }

	1039 byteIndex=0;

	1040 bits=0;

	1041 base64Counter=0;

	1042 break;

	1043 default:

	1044 /* will never occur */

	1045 break;

	1046 }

	1047 } else if(base64Value==-2) {

	1048 /* minus sign terminates the base64 sequence */

	1049 inDirectMode=TRUE;

	1050 if(base64Counter==-1) {

	1051 /* &- i.e. a minus immediately following an ampersand */

	1052 *target++=AMPERSAND;

	1053 if(offsets!=NULL) {

	1054 *offsets++=sourceIndex-1;

	1055 }

	1056 } else {

	1057 /* absorb the minus and leave the Unicode Mode */

	1058 if(bits!=0 \|\| (base64Counter!=0 && base64Counter!=3 && b ase64Counter!=6)) {

	1059 /* bits are illegally left over, a UChar is incomple te */

	1060 /* base64Counter other than 0, 3, 6 means non-minima l zero-padding, also illegal */

	1061 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	1062 break;

	1063 }

	1064 }

	1065 sourceIndex=nextSourceIndex;

	1066 goto directMode;

	1067 } else {

	1068 if(base64Counter==-1) {

	1069 /* illegal: & immediately followed by something other th an base64 or minus sign */

	1070 /* include the ampersand in the reported sequence */

	1071 --sourceIndex;

	1072 bytes[0]=AMPERSAND;

	1073 bytes[1]=b;

	1074 byteIndex=2;

	1075 }

	1076 /* base64Value==-1 for characters that are illegal only in U nicode mode */

	1077 /* base64Value==-3 for illegal characters */

	1078 /* illegal */

	1079 inDirectMode=TRUE;

	1080 *pErrorCode=U_ILLEGAL_CHAR_FOUND;

	1081 break;

	1082 }

	1083 } else {

	1084 /* target is full */

	1085 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1086 break;

	1087 }

	1088 }

	1089 }

	1090 endloop:

	1091

	1092 /*

	1093 * the end of the input stream and detection of truncated input

	1094 * are handled by the framework, but here we must check if we are in Unicode

	1095 * mode and byteIndex==0 because we must end in direct mode

	1096 *

	1097 * conditions:

	1098 * successful

	1099 * in Unicode mode and byteIndex==0

	1100 * end of input and no truncated input

	1101 */

	1102 if( U_SUCCESS(*pErrorCode) &&

	1103 !inDirectMode && byteIndex==0 &&

	1104 pArgs->flush && source>=sourceLimit

	1105 ) {

	1106 if(base64Counter==-1) {

	1107 /* & at the very end of the input */

	1108 /* make the ampersand the reported sequence */

	1109 bytes[0]=AMPERSAND;

	1110 byteIndex=1;

	1111 }

	1112 /* else if(base64Counter!=-1) byteIndex remains 0 because there is no pa rticular byte sequence */

	1113

	1114 inDirectMode=TRUE; /* avoid looping */

	1115 *pErrorCode=U_TRUNCATED_CHAR_FOUND;

	1116 }

	1117

	1118 /* set the converter state back into UConverter */

	1119 cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)\|((uint32_t)((uint8_t)base6 4Counter)<<16)\|(uint32_t)bits;

	1120 cnv->toULength=byteIndex;

	1121

	1122 /* write back the updated pointers */

	1123 pArgs->source=(const char *)source;

	1124 pArgs->target=target;

	1125 pArgs->offsets=offsets;

	1126 return;

	1127 }

	1128

	1129 static void

	1130 _IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,

	1131 UErrorCode *pErrorCode) {

	1132 UConverter *cnv;

	1133 const UChar source, sourceLimit;

	1134 uint8_t target, targetLimit;

	1135 int32_t *offsets;

	1136

	1137 int32_t length, targetCapacity, sourceIndex;

	1138 UChar c;

	1139 uint8_t b;

	1140

	1141 /* UTF-7 state */

	1142 uint8_t bits;

	1143 int8_t base64Counter;

	1144 UBool inDirectMode;

	1145

	1146 /* set up the local pointers */

	1147 cnv=pArgs->converter;

	1148

	1149 /* set up the local pointers */

	1150 source=pArgs->source;

	1151 sourceLimit=pArgs->sourceLimit;

	1152 target=(uint8_t *)pArgs->target;

	1153 targetLimit=(uint8_t *)pArgs->targetLimit;

	1154 offsets=pArgs->offsets;

	1155

	1156 /* get the state machine state */

	1157 {

	1158 uint32_t status=cnv->fromUnicodeStatus;

	1159 inDirectMode=(UBool)((status>>24)&1);

	1160 base64Counter=(int8_t)(status>>16);

	1161 bits=(uint8_t)status;

	1162 }

	1163

	1164 /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple s ourceIndex */

	1165 sourceIndex=0;

	1166

	1167 if(inDirectMode) {

	1168 directMode:

	1169 length=(int32_t)(sourceLimit-source);

	1170 targetCapacity=(int32_t)(targetLimit-target);

	1171 if(length>targetCapacity) {

	1172 length=targetCapacity;

	1173 }

	1174 while(length>0) {

	1175 c=*source++;

	1176 /* encode 0x20..0x7e except '&' directly */

	1177 if(inSetDIMAP(c)) {

	1178 /* encode directly */

	1179 *target++=(uint8_t)c;

	1180 if(offsets!=NULL) {

	1181 *offsets++=sourceIndex++;

	1182 }

	1183 } else if(c==AMPERSAND) {

	1184 /* output &- for & */

	1185 *target++=AMPERSAND;

	1186 if(target<targetLimit) {

	1187 *target++=MINUS;

	1188 if(offsets!=NULL) {

	1189 *offsets++=sourceIndex;

	1190 *offsets++=sourceIndex++;

	1191 }

	1192 /* realign length and targetCapacity */

	1193 goto directMode;

	1194 } else {

	1195 if(offsets!=NULL) {

	1196 *offsets++=sourceIndex++;

	1197 }

	1198 cnv->charErrorBuffer[0]=MINUS;

	1199 cnv->charErrorBufferLength=1;

	1200 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1201 break;

	1202 }

	1203 } else {

	1204 /* un-read this character and switch to Unicode Mode */

	1205 --source;

	1206 *target++=AMPERSAND;

	1207 if(offsets!=NULL) {

	1208 *offsets++=sourceIndex;

	1209 }

	1210 inDirectMode=FALSE;

	1211 base64Counter=0;

	1212 goto unicodeMode;

	1213 }

	1214 --length;

	1215 }

	1216 if(source<sourceLimit && target>=targetLimit) {

	1217 /* target is full */

	1218 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1219 }

	1220 } else {

	1221 unicodeMode:

	1222 while(source<sourceLimit) {

	1223 if(target<targetLimit) {

	1224 c=*source++;

	1225 if(isLegalIMAP(c)) {

	1226 /* encode directly */

	1227 inDirectMode=TRUE;

	1228

	1229 /* trick: back out this character to make this easier */

	1230 --source;

	1231

	1232 /* terminate the base64 sequence */

	1233 if(base64Counter!=0) {

	1234 /* write remaining bits for the previous character */

	1235 *target++=TO_BASE64_IMAP(bits);

	1236 if(offsets!=NULL) {

	1237 *offsets++=sourceIndex-1;

	1238 }

	1239 }

	1240 /* need to terminate with a minus */

	1241 if(target<targetLimit) {

	1242 *target++=MINUS;

	1243 if(offsets!=NULL) {

	1244 *offsets++=sourceIndex-1;

	1245 }

	1246 } else {

	1247 cnv->charErrorBuffer[0]=MINUS;

	1248 cnv->charErrorBufferLength=1;

	1249 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1250 break;

	1251 }

	1252 goto directMode;

	1253 } else {

	1254 /*

	1255 * base64 this character:

	1256 * Output 2 or 3 base64 bytes for the remaining bits of the previous character

	1257 * and the bits of this character, each implicitly in UTF-16 BE.

	1258 *

	1259 * Here, bits is an 8-bit variable because only 6 bits need to be kept from one

	1260 * character to the next. The actual 2 or 4 bits are shifted to the left edge

	1261 * of the 6-bits field 5..0 to make the termination of the b ase64 sequence easier.

	1262 */

	1263 switch(base64Counter) {

	1264 case 0:

	1265 b=(uint8_t)(c>>10);

	1266 *target++=TO_BASE64_IMAP(b);

	1267 if(target<targetLimit) {

	1268 b=(uint8_t)((c>>4)&0x3f);

	1269 *target++=TO_BASE64_IMAP(b);

	1270 if(offsets!=NULL) {

	1271 *offsets++=sourceIndex;

	1272 *offsets++=sourceIndex++;

	1273 }

	1274 } else {

	1275 if(offsets!=NULL) {

	1276 *offsets++=sourceIndex++;

	1277 }

	1278 b=(uint8_t)((c>>4)&0x3f);

	1279 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);

	1280 cnv->charErrorBufferLength=1;

	1281 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1282 }

	1283 bits=(uint8_t)((c&15)<<2);

	1284 base64Counter=1;

	1285 break;

	1286 case 1:

	1287 b=(uint8_t)(bits\|(c>>14));

	1288 *target++=TO_BASE64_IMAP(b);

	1289 if(target<targetLimit) {

	1290 b=(uint8_t)((c>>8)&0x3f);

	1291 *target++=TO_BASE64_IMAP(b);

	1292 if(target<targetLimit) {

	1293 b=(uint8_t)((c>>2)&0x3f);

	1294 *target++=TO_BASE64_IMAP(b);

	1295 if(offsets!=NULL) {

	1296 *offsets++=sourceIndex;

	1297 *offsets++=sourceIndex;

	1298 *offsets++=sourceIndex++;

	1299 }

	1300 } else {

	1301 if(offsets!=NULL) {

	1302 *offsets++=sourceIndex;

	1303 *offsets++=sourceIndex++;

	1304 }

	1305 b=(uint8_t)((c>>2)&0x3f);

	1306 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);

	1307 cnv->charErrorBufferLength=1;

	1308 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1309 }

	1310 } else {

	1311 if(offsets!=NULL) {

	1312 *offsets++=sourceIndex++;

	1313 }

	1314 b=(uint8_t)((c>>8)&0x3f);

	1315 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);

	1316 b=(uint8_t)((c>>2)&0x3f);

	1317 cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);

	1318 cnv->charErrorBufferLength=2;

	1319 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1320 }

	1321 bits=(uint8_t)((c&3)<<4);

	1322 base64Counter=2;

	1323 break;

	1324 case 2:

	1325 b=(uint8_t)(bits\|(c>>12));

	1326 *target++=TO_BASE64_IMAP(b);

	1327 if(target<targetLimit) {

	1328 b=(uint8_t)((c>>6)&0x3f);

	1329 *target++=TO_BASE64_IMAP(b);

	1330 if(target<targetLimit) {

	1331 b=(uint8_t)(c&0x3f);

	1332 *target++=TO_BASE64_IMAP(b);

	1333 if(offsets!=NULL) {

	1334 *offsets++=sourceIndex;

	1335 *offsets++=sourceIndex;

	1336 *offsets++=sourceIndex++;

	1337 }

	1338 } else {

	1339 if(offsets!=NULL) {

	1340 *offsets++=sourceIndex;

	1341 *offsets++=sourceIndex++;

	1342 }

	1343 b=(uint8_t)(c&0x3f);

	1344 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);

	1345 cnv->charErrorBufferLength=1;

	1346 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1347 }

	1348 } else {

	1349 if(offsets!=NULL) {

	1350 *offsets++=sourceIndex++;

	1351 }

	1352 b=(uint8_t)((c>>6)&0x3f);

	1353 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);

	1354 b=(uint8_t)(c&0x3f);

	1355 cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);

	1356 cnv->charErrorBufferLength=2;

	1357 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1358 }

	1359 bits=0;

	1360 base64Counter=0;

	1361 break;

	1362 default:

	1363 /* will never occur */

	1364 break;

	1365 }

	1366 }

	1367 } else {

	1368 /* target is full */

	1369 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1370 break;

	1371 }

	1372 }

	1373 }

	1374

	1375 if(pArgs->flush && source>=sourceLimit) {

	1376 /* flush remaining bits to the target */

	1377 if(!inDirectMode) {

	1378 if(base64Counter!=0) {

	1379 if(target<targetLimit) {

	1380 *target++=TO_BASE64_IMAP(bits);

	1381 if(offsets!=NULL) {

	1382 *offsets++=sourceIndex-1;

	1383 }

	1384 } else {

	1385 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64 _IMAP(bits);

	1386 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1387 }

	1388 }

	1389 /* need to terminate with a minus */

	1390 if(target<targetLimit) {

	1391 *target++=MINUS;

	1392 if(offsets!=NULL) {

	1393 *offsets++=sourceIndex-1;

	1394 }

	1395 } else {

	1396 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;

	1397 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

	1398 }

	1399 }

	1400 /* reset the state for the next conversion */

	1401 cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)\|0x1000000; /* keep version, inDirectMode=TRUE */

	1402 } else {

	1403 /* set the converter state back into UConverter */

	1404 cnv->fromUnicodeStatus=

	1405 (cnv->fromUnicodeStatus&0xf0000000)\| /* keep version*/

	1406 ((uint32_t)inDirectMode<<24)\|((uint32_t)base64Counter<<16)\|(uint32_t )bits;

	1407 }

	1408

	1409 /* write back the updated pointers */

	1410 pArgs->source=source;

	1411 pArgs->target=(char *)target;

	1412 pArgs->offsets=offsets;

	1413 return;

	1414 }

	1415

	1416 static const UConverterImpl _IMAPImpl={

	1417 UCNV_IMAP_MAILBOX,

	1418

	1419 NULL,

	1420 NULL,

	1421

	1422 _UTF7Open,

	1423 NULL,

	1424 _UTF7Reset,

	1425

	1426 _IMAPToUnicodeWithOffsets,

	1427 _IMAPToUnicodeWithOffsets,

	1428 _IMAPFromUnicodeWithOffsets,

	1429 _IMAPFromUnicodeWithOffsets,

	1430 NULL,

	1431

	1432 NULL,

	1433 NULL,

	1434 NULL, /* we don't need writeSub() because we never call a callback at fromUn icode() */

	1435 NULL,

	1436 ucnv_getCompleteUnicodeSet

	1437 };

	1438

	1439 static const UConverterStaticData _IMAPStaticData={

	1440 sizeof(UConverterStaticData),

	1441 "IMAP-mailbox-name",

	1442 0, /* TODO CCSID for IMAP-mailbox-name */

	1443 UCNV_IBM, UCNV_IMAP_MAILBOX,

	1444 1, 4,

	1445 { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */

	1446 FALSE, FALSE,

	1447 0,

	1448 0,

	1449 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

	1450 };

	1451

	1452 const UConverterSharedData _IMAPData={

	1453 sizeof(UConverterSharedData), ~((uint32_t)0),

	1454 NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,

	1455 0

	1456 };

	1457

	1458 #endif

OLD	NEW

« no previous file with comments | « icu46/source/common/ucnv_u32.c ('k') | icu46/source/common/ucnv_u8.c » ('j') | no next file with comments »