icu46/source/common/ucnv2022.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/ucnv2022.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 2000-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * file name: ucnv2022.c

	7 * encoding: US-ASCII

	8 * tab size: 8 (not used)

	9 * indentation:4

	10 *

	11 * created on: 2000feb03

	12 * created by: Markus W. Scherer

	13 *

	14 * Change history:

	15 *

	16 * 06/29/2000 helena Major rewrite of the callback APIs.

	17 * 08/08/2000 Ram Included support for ISO-2022-JP-2

	18 * Changed implementation of toUnicode

	19 * function

	20 * 08/21/2000 Ram Added support for ISO-2022-KR

	21 * 08/29/2000 Ram Seperated implementation of EBCDIC to

	22 * ucnvebdc.c

	23 * 09/20/2000 Ram Added support for ISO-2022-CN

	24 * Added implementations for getNextUChar()

	25 * for specific 2022 country variants.

	26 * 10/31/2000 Ram Implemented offsets logic functions

	27 */

	28

	29 #include "unicode/utypes.h"

	30

	31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

	32

	33 #include "unicode/ucnv.h"

	34 #include "unicode/uset.h"

	35 #include "unicode/ucnv_err.h"

	36 #include "unicode/ucnv_cb.h"

	37 #include "ucnv_imp.h"

	38 #include "ucnv_bld.h"

	39 #include "ucnv_cnv.h"

	40 #include "ucnvmbcs.h"

	41 #include "cstring.h"

	42 #include "cmemory.h"

	43

	44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

	45

	46 #ifdef U_ENABLE_GENERIC_ISO_2022

	47 /*

	48 * I am disabling the generic ISO-2022 converter after proposing to do so on

	49 * the icu mailing list two days ago.

	50 *

	51 * Reasons:

	52 * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of

	53 * its designation sequences, single shifts with return to the previous state ,

	54 * switch-with-no-return to UTF-16BE or similar, etc.

	55 * This is unlike the language-specific variants like ISO-2022-JP which

	56 * require a much smaller repertoire of ISO-2022 features.

	57 * These variants continue to be supported.

	58 * 2. I believe that no one is really using the generic ISO-2022 converter

	59 * but rather always one of the language-specific variants.

	60 * Note that ICU's generic ISO-2022 converter has always output one escape

	61 * sequence followed by UTF-8 for the whole stream.

	62 * 3. Switching between subcharsets is extremely slow, because each time

	63 * the previous converter is closed and a new one opened,

	64 * without any kind of caching, least-recently-used list, etc.

	65 * 4. The code is currently buggy, and given the above it does not seem

	66 * reasonable to spend the time on maintenance.

	67 * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.

	68 * This means, for example, that when ISO-8859-7 is designated, the following

	69 * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.

	70 * The ICU ISO-2022 converter does not handle this - and has no information

	71 * about which subconverter would have to be shifted vs. which is designed

	72 * for 7-bit ISO-2022.

	73 *

	74 * Markus Scherer 2003-dec-03

	75 */

	76 #endif

	77

	78 static const char SHIFT_IN_STR[] = "\x0F";

	79 static const char SHIFT_OUT_STR[] = "\x0E";

	80

	81 #define CR 0x0D

	82 #define LF 0x0A

	83 #define H_TAB 0x09

	84 #define V_TAB 0x0B

	85 #define SPACE 0x20

	86

	87 enum {

	88 HWKANA_START=0xff61,

	89 HWKANA_END=0xff9f

	90 };

	91

	92 /*

	93 * 94-character sets with native byte values A1..FE are encoded in ISO 2022

	94 * as bytes 21..7E. (Subtract 0x80.)

	95 * 96-character sets with native byte values A0..FF are encoded in ISO 2022

	96 * as bytes 20..7F. (Subtract 0x80.)

	97 * Do not encode C1 control codes with native bytes 80..9F

	98 * as bytes 00..1F (C0 control codes).

	99 */

	100 enum {

	101 GR94_START=0xa1,

	102 GR94_END=0xfe,

	103 GR96_START=0xa0,

	104 GR96_END=0xff

	105 };

	106

	107 /*

	108 * ISO 2022 control codes must not be converted from Unicode

	109 * because they would mess up the byte stream.

	110 * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b

	111 * corresponding to SO, SI, and ESC.

	112 */

	113 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)

	114

	115 /* for ISO-2022-JP and -CN implementations */

	116 typedef enum {

	117 /* shared values */

	118 INVALID_STATE=-1,

	119 ASCII = 0,

	120

	121 SS2_STATE=0x10,

	122 SS3_STATE,

	123

	124 /* JP */

	125 ISO8859_1 = 1 ,

	126 ISO8859_7 = 2 ,

	127 JISX201 = 3,

	128 JISX208 = 4,

	129 JISX212 = 5,

	130 GB2312 =6,

	131 KSC5601 =7,

	132 HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */

	133

	134 /* CN */

	135 /* the first few enum constants must keep their values because they corr espond to myConverterArray[] */

	136 GB2312_1=1,

	137 ISO_IR_165=2,

	138 CNS_11643=3,

	139

	140 /*

	141 * these are used in StateEnum and ISO2022State variables,

	142 * but CNS_11643 must be used to index into myConverterArray[]

	143 */

	144 CNS_11643_0=0x20,

	145 CNS_11643_1,

	146 CNS_11643_2,

	147 CNS_11643_3,

	148 CNS_11643_4,

	149 CNS_11643_5,

	150 CNS_11643_6,

	151 CNS_11643_7

	152 } StateEnum;

	153

	154 /* is the StateEnum charset value for a DBCS charset? */

	155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)

	156

	157 #define CSM(cs) ((uint16_t)1<<(cs))

	158

	159 /*

	160 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence

	161 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x

	162 *

	163 * Note: The converter uses some leniency:

	164 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in

	165 * all versions, not just JIS7 and JIS8.

	166 * - ICU does not distinguish between different versions of JIS X 0208.

	167 */

	168 enum { MAX_JA_VERSION=4 };

	169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={

	170 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT),

	171 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212),

	172 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB231 2)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),

	173 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB231 2)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),

	174 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB231 2)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7)

	175 };

	176

	177 typedef enum {

	178 ASCII1=0,

	179 LATIN1,

	180 SBCS,

	181 DBCS,

	182 MBCS,

	183 HWKANA

	184 }Cnv2022Type;

	185

	186 typedef struct ISO2022State {

	187 int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */

	188 int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */

	189 int8_t prevG; /* g before single shift (SS2 or SS3) */

	190 } ISO2022State;

	191

	192 #define UCNV_OPTIONS_VERSION_MASK 0xf

	193 #define UCNV_2022_MAX_CONVERTERS 10

	194

	195 typedef struct{

	196 UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];

	197 UConverter *currentConverter;

	198 Cnv2022Type currentType;

	199 ISO2022State toU2022State, fromU2022State;

	200 uint32_t key;

	201 uint32_t version;

	202 #ifdef U_ENABLE_GENERIC_ISO_2022

	203 UBool isFirstBuffer;

	204 #endif

	205 UBool isEmptySegment;

	206 char name[30];

	207 char locale[3];

	208 }UConverterDataISO2022;

	209

	210 /* Protos */

	211 /* ISO-2022 ----------------------------------------------------------------- */

	212

	213 /Forward declaration /

	214 U_CFUNC void

	215 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,

	216 UErrorCode * err);

	217 U_CFUNC void

	218 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,

	219 UErrorCode * err);

	220

	221 #define ESC_2022 0x1B /ESC/

	222

	223 typedef enum

	224 {

	225 INVALID_2022 = -1, /Doesn't correspond to a valid iso 2022 escape seque nce/

	226 VALID_NON_TERMINAL_2022 = 0, /so far corresponds to a valid iso 2022 es cape sequence/

	227 VALID_TERMINAL_2022 = 1, /corresponds to a valid iso 2022 escape sequen ce/

	228 VALID_MAYBE_TERMINAL_2022 = 2 /so far matches one iso 2022 escape seque nce, but by adding more characters might match another escape sequence/

	229 } UCNV_TableStates_2022;

	230

	231 /*

	232 * The way these state transition arrays work is:

	233 * ex : ESC$B is the sequence for JISX208

	234 * a) First Iteration: char is ESC

	235 * i) Get the value of ESC from normalize_esq_chars_2022[] with int valu e of ESC as index

	236 * int x = normalize_esq_chars_2022[27] which is equal to 1

	237 * ii) Search for this value in escSeqStateTable_Key_2022[]

	238 * value of x is stored at escSeqStateTable_Key_2022[0]

	239 * iii) Save this index as offset

	240 * iv) Get state of this sequence from escSeqStateTable_Value_2022[]

	241 * escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2 022

	242 * b) Switch on this state and continue to next char

	243 * i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index

	244 * which is normalize_esq_chars_2022[36] == 4

	245 * ii) x is currently 1(from above)

	246 * x<<=5 -- x is now 32

	247 * x+=normalize_esq_chars_2022[36]

	248 * now x is 36

	249 * iii) Search for this value in escSeqStateTable_Key_2022[]

	250 * value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2

	251 * iv) Get state of this sequence from escSeqStateTable_Value_2022[]

	252 * escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2 022

	253 * c) Switch on this state and continue to next char

	254 * i) Get the value of B from normalize_esq_chars_2022[] with int value o f B as index

	255 * ii) x is currently 36 (from above)

	256 * x<<=5 -- x is now 1152

	257 * x+=normalize_esq_chars_2022[66]

	258 * now x is 1161

	259 * iii) Search for this value in escSeqStateTable_Key_2022[]

	260 * value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21

	261 * iv) Get state of this sequence from escSeqStateTable_Value_2022[21]

	262 * escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022

	263 * v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208

	264 */

	265

	266

	267 /Below are the 3 arrays depicting a state transition table/

	268 static const int8_t normalize_esq_chars_2022[256] = {

	269 /* 0 1 2 3 4 5 6 7 8 9 */

	270

	271 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	272 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	273 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 , 0

	274 ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0

	275 ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 , 0

	276 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	277 ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 , 12

	278 ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 , 28

	279 ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	280 ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	281 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	282 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	283 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	284 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	285 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	286 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	287 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	288 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	289 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	290 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	291 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	292 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	293 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	294 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	295 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0

	296 ,0 ,0 ,0 ,0 ,0 ,0

	297 };

	298

	299 #ifdef U_ENABLE_GENERIC_ISO_2022

	300 /*

	301 * When the generic ISO-2022 converter is completely removed, not just disabled

	302 * per #ifdef, then the following state table and the associated tables that are

	303 * dimensioned with MAX_STATES_2022 should be trimmed.

	304 *

	305 * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of

	306 * the associated escape sequences starting with ESC ( B should be removed.

	307 * This includes the ones with key values 1097 and all of the ones above 1000000 .

	308 *

	309 * For the latter, the tables can simply be truncated.

	310 * For the former, since the tables must be kept parallel, it is probably best

	311 * to simply duplicate an adjacent table cell, parallel in all tables.

	312 *

	313 * It may make sense to restructure the tables, especially by using small search

	314 * tables for the variants instead of indexing them parallel to the table here.

	315 */

	316 #endif

	317

	318 #define MAX_STATES_2022 74

	319 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {

	320 /* 0 1 2 3 4 5 6 7 8 9 */

	321

	322 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096

	323 ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,110 3 ,1104 ,1105 ,1106

	324 ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,117 8 ,1179 ,1254 ,1257

	325 ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,369 37 ,36938 ,36939 ,36940

	326 ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,369 48 ,37640 ,37642 ,37644

	327 ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,377 48 ,40133 ,40136 ,40138

	328 ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,359 47626 ,35947627 ,35947629 ,35947630

	329 ,35947631 ,35947635 ,35947636 ,35947638

	330 };

	331

	332 #ifdef U_ENABLE_GENERIC_ISO_2022

	333

	334 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {

	335 /* 0 1 2 3 4 5 6 7 8 9 */

	336

	337 NULL ,NULL ,NULL ,NUL L ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1"

	338 ,"latin1" ,"ibm-865" ,"ibm-865" ,"ib m-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"

	339 ,"latin1" ,NULL ,"JISX-208" ,"ib m-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8"

	340 ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NUL L ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"

	341 ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ib m-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"

	342 ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CN S-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"

	343 ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NUL L ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"

	344 ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"la tin1"

	345 };

	346

	347 #endif

	348

	349 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {

	350 /* 0 1 2 3 4 5 6 7 8 9 */

	351 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_ 2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022

	352 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022

	353 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T ERMINAL_2022 ,VALID_TERMINAL_2022

	354 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022

	355 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022

	356 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022

	357 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022

	358 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

	359 };

	360

	361

	362 /* Type def for refactoring changeState_2022 code*/

	363 typedef enum{

	364 #ifdef U_ENABLE_GENERIC_ISO_2022

	365 ISO_2022=0,

	366 #endif

	367 ISO_2022_JP=1,

	368 ISO_2022_KR=2,

	369 ISO_2022_CN=3

	370 } Variant2022;

	371

	372 /********* ISO 2022 Converter Protos *********/

	373 static void

	374 _ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode);

	375

	376 static void

	377 _ISO2022Close(UConverter *converter);

	378

	379 static void

	380 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);

	381

	382 static const char*

	383 _ISO2022getName(const UConverter* cnv);

	384

	385 static void

	386 _ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorC ode err);

	387

	388 static UConverter *

	389 _ISO_2022_SafeClone(const UConverter cnv, void stackBuffer, int32_t pBufferSi ze, UErrorCode status);

	390

	391 #ifdef U_ENABLE_GENERIC_ISO_2022

	392 static void

	393 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UEr rorCode* err);

	394 #endif

	395

	396 /const UConverterSharedData _ISO2022Data;/

	397 static const UConverterSharedData _ISO2022JPData;

	398 static const UConverterSharedData _ISO2022KRData;

	399 static const UConverterSharedData _ISO2022CNData;

	400

	401 /************* Converter implementations ****************/

	402

	403 /* The purpose of this function is to get around gcc compiler warnings. */

	404 static U_INLINE void

	405 fromUWriteUInt8(UConverter *cnv,

	406 const char *bytes, int32_t length,

	407 uint8_t *target, const char targetLimit,

	408 int32_t **offsets,

	409 int32_t sourceIndex,

	410 UErrorCode *pErrorCode)

	411 {

	412 char targetChars = (char )*target;

	413 ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,

	414 offsets, sourceIndex, pErrorCode);

	415 target = (uint8_t)targetChars;

	416

	417 }

	418

	419 static U_INLINE void

	420 setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConve rterData){

	421 if(myConverterData->version == 1) {

	422 UConverter *cnv = myConverterData->currentConverter;

	423

	424 cnv->toUnicodeStatus=0; /* offset */

	425 cnv->mode=0; /* state */

	426 cnv->toULength=0; /* byteIndex */

	427 }

	428 }

	429

	430 static U_INLINE void

	431 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConv erterData){

	432 /* in ISO-2022-KR the designator sequence appears only once

	433 * in a file so we append it only once

	434 */

	435 if( converter->charErrorBufferLength==0){

	436

	437 converter->charErrorBufferLength = 4;

	438 converter->charErrorBuffer[0] = 0x1b;

	439 converter->charErrorBuffer[1] = 0x24;

	440 converter->charErrorBuffer[2] = 0x29;

	441 converter->charErrorBuffer[3] = 0x43;

	442 }

	443 if(myConverterData->version == 1) {

	444 UConverter *cnv = myConverterData->currentConverter;

	445

	446 cnv->fromUChar32=0;

	447 cnv->fromUnicodeStatus=1; /* prevLength */

	448 }

	449 }

	450

	451 static void

	452 _ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode){

	453

	454 char myLocale[6]={' ',' ',' ',' ',' ',' '};

	455

	456 cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));

	457 if(cnv->extraInfo != NULL) {

	458 UConverterNamePieces stackPieces;

	459 UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) };

	460 UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->ex traInfo;

	461 uint32_t version;

	462

	463 stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;

	464

	465 uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));

	466 myConverterData->currentType = ASCII1;

	467 cnv->fromUnicodeStatus =FALSE;

	468 if(pArgs->locale){

	469 uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));

	470 }

	471 version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;

	472 myConverterData->version = version;

	473 if(myLocale[0]=='j' && (myLocale[1]=='a'\|\| myLocale[1]=='p') &&

	474 (myLocale[2]=='_' \|\| myLocale[2]=='\0'))

	475 {

	476 size_t len=0;

	477 /* open the required converters and cache them */

	478 if(version>MAX_JA_VERSION) {

	479 /* prevent indexing beyond jpCharsetMasks[] */

	480 myConverterData->version = version = 0;

	481 }

	482 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {

	483 myConverterData->myConverterArray[ISO8859_7] =

	484 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode);

	485 }

	486 myConverterData->myConverterArray[JISX208] =

	487 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code);

	488 if(jpCharsetMasks[version]&CSM(JISX212)) {

	489 myConverterData->myConverterArray[JISX212] =

	490 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode);

	491 }

	492 if(jpCharsetMasks[version]&CSM(GB2312)) {

	493 myConverterData->myConverterArray[GB2312] =

	494 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */

	495 }

	496 if(jpCharsetMasks[version]&CSM(KSC5601)) {

	497 myConverterData->myConverterArray[KSC5601] =

	498 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode);

	499 }

	500

	501 /* set the function pointers to appropriate funtions */

	502 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);

	503 uprv_strcpy(myConverterData->locale,"ja");

	504

	505 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= ");

	506 len = uprv_strlen(myConverterData->name);

	507 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ;

	508 myConverterData->name[len+1]='\0';

	509 }

	510 else if(myLocale[0]=='k' && (myLocale[1]=='o'\|\| myLocale[1]=='r') &&

	511 (myLocale[2]=='_' \|\| myLocale[2]=='\0'))

	512 {

	513 const char *cnvName;

	514 if(version==1) {

	515 cnvName="icu-internal-25546";

	516 } else {

	517 cnvName="ibm-949";

	518 myConverterData->version=version=0;

	519 }

	520 if(pArgs->onlyTestIsLoadable) {

	521 ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carri es result */

	522 uprv_free(cnv->extraInfo);

	523 cnv->extraInfo=NULL;

	524 return;

	525 } else {

	526 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);

	527 if (U_FAILURE(*errorCode)) {

	528 _ISO2022Close(cnv);

	529 return;

	530 }

	531

	532 if(version==1) {

	533 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko, version=1");

	534 uprv_memcpy(cnv->subChars, myConverterData->currentConverter ->subChars, 4);

	535 cnv->subCharLen = myConverterData->currentConverter->subChar Len;

	536 }else{

	537 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko, version=0");

	538 }

	539

	540 /* initialize the state variables */

	541 setInitialStateToUnicodeKR(cnv, myConverterData);

	542 setInitialStateFromUnicodeKR(cnv, myConverterData);

	543

	544 /* set the function pointers to appropriate funtions */

	545 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;

	546 uprv_strcpy(myConverterData->locale,"ko");

	547 }

	548 }

	549 else if(((myLocale[0]=='z' && myLocale[1]=='h') \|\| (myLocale[0]=='c'&& m yLocale[1]=='n'))&&

	550 (myLocale[2]=='_' \|\| myLocale[2]=='\0'))

	551 {

	552

	553 /* open the required converters and cache them */

	554 myConverterData->myConverterArray[GB2312_1] =

	555 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorC ode);

	556 if(version==1) {

	557 myConverterData->myConverterArray[ISO_IR_165] =

	558 ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);

	559 }

	560 myConverterData->myConverterArray[CNS_11643] =

	561 ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);

	562

	563

	564 /* set the function pointers to appropriate funtions */

	565 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;

	566 uprv_strcpy(myConverterData->locale,"cn");

	567

	568 if (version==0){

	569 myConverterData->version = 0;

	570 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0");

	571 }else if (version==1){

	572 myConverterData->version = 1;

	573 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1");

	574 }else {

	575 myConverterData->version = 2;

	576 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2");

	577 }

	578 }

	579 else{

	580 #ifdef U_ENABLE_GENERIC_ISO_2022

	581 myConverterData->isFirstBuffer = TRUE;

	582

	583 /* append the UTF-8 escape sequence */

	584 cnv->charErrorBufferLength = 3;

	585 cnv->charErrorBuffer[0] = 0x1b;

	586 cnv->charErrorBuffer[1] = 0x25;

	587 cnv->charErrorBuffer[2] = 0x42;

	588

	589 cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;

	590 /* initialize the state variables */

	591 uprv_strcpy(myConverterData->name,"ISO_2022");

	592 #else

	593 *errorCode = U_UNSUPPORTED_ERROR;

	594 return;

	595 #endif

	596 }

	597

	598 cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;

	599

	600 if(U_FAILURE(*errorCode) \|\| pArgs->onlyTestIsLoadable) {

	601 _ISO2022Close(cnv);

	602 }

	603 } else {

	604 *errorCode = U_MEMORY_ALLOCATION_ERROR;

	605 }

	606 }

	607

	608

	609 static void

	610 _ISO2022Close(UConverter *converter) {

	611 UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraIn fo);

	612 UConverterSharedData **array = myData->myConverterArray;

	613 int32_t i;

	614

	615 if (converter->extraInfo != NULL) {

	616 /close the array of converter pointers and free the memory/

	617 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

	618 if(array[i]!=NULL) {

	619 ucnv_unloadSharedDataIfReady(array[i]);

	620 }

	621 }

	622

	623 ucnv_close(myData->currentConverter);

	624

	625 if(!converter->isExtraLocal){

	626 uprv_free (converter->extraInfo);

	627 converter->extraInfo = NULL;

	628 }

	629 }

	630 }

	631

	632 static void

	633 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {

	634 UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) (converter- >extraInfo);

	635 if(choice<=UCNV_RESET_TO_UNICODE) {

	636 uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));

	637 myConverterData->key = 0;

	638 myConverterData->isEmptySegment = FALSE;

	639 }

	640 if(choice!=UCNV_RESET_TO_UNICODE) {

	641 uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));

	642 }

	643 #ifdef U_ENABLE_GENERIC_ISO_2022

	644 if(myConverterData->locale[0] == 0){

	645 if(choice<=UCNV_RESET_TO_UNICODE) {

	646 myConverterData->isFirstBuffer = TRUE;

	647 myConverterData->key = 0;

	648 if (converter->mode == UCNV_SO){

	649 ucnv_close (myConverterData->currentConverter);

	650 myConverterData->currentConverter=NULL;

	651 }

	652 converter->mode = UCNV_SI;

	653 }

	654 if(choice!=UCNV_RESET_TO_UNICODE) {

	655 /* re-append UTF-8 escape sequence */

	656 converter->charErrorBufferLength = 3;

	657 converter->charErrorBuffer[0] = 0x1b;

	658 converter->charErrorBuffer[1] = 0x28;

	659 converter->charErrorBuffer[2] = 0x42;

	660 }

	661 }

	662 else

	663 #endif

	664 {

	665 /* reset the state variables */

	666 if(myConverterData->locale[0] == 'k'){

	667 if(choice<=UCNV_RESET_TO_UNICODE) {

	668 setInitialStateToUnicodeKR(converter, myConverterData);

	669 }

	670 if(choice!=UCNV_RESET_TO_UNICODE) {

	671 setInitialStateFromUnicodeKR(converter, myConverterData);

	672 }

	673 }

	674 }

	675 }

	676

	677 static const char*

	678 _ISO2022getName(const UConverter* cnv){

	679 if(cnv->extraInfo){

	680 UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;

	681 return myData->name;

	682 }

	683 return NULL;

	684 }

	685

	686

	687 /************* to unicode *****************/

	688 /****************************************************************************

	689 * Recognized escape sequences are

	690 * <ESC>(B ASCII

	691 * <ESC>.A ISO-8859-1

	692 * <ESC>.F ISO-8859-7

	693 * <ESC>(J JISX-201

	694 * <ESC>(I JISX-201

	695 * <ESC>$B JISX-208

	696 * <ESC>$@ JISX-208

	697 * <ESC>$(D JISX-212

	698 * <ESC>$A GB2312

	699 * <ESC>$(C KSC5601

	700 */

	701 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {

	702 /* 0 1 2 3 4 5 6 7 8 9 */

	703 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	704 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE

	705 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	706 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE

	707 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	708 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	709 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	710 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

	711 };

	712

	713 /************* to unicode *****************/

	714 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {

	715 /* 0 1 2 3 4 5 6 7 8 9 */

	716 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	717 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	718 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	719 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	720 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165

	721 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	722 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE

	723 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

	724 };

	725

	726

	727 static UCNV_TableStates_2022

	728 getKey_2022(char c,int32_t* key,int32_t* offset){

	729 int32_t togo;

	730 int32_t low = 0;

	731 int32_t hi = MAX_STATES_2022;

	732 int32_t oldmid=0;

	733

	734 togo = normalize_esq_chars_2022[(uint8_t)c];

	735 if(togo == 0) {

	736 /* not a valid character anywhere in an escape sequence */

	737 *key = 0;

	738 *offset = 0;

	739 return INVALID_2022;

	740 }

	741 togo = (*key << 5) + togo;

	742

	743 while (hi != low) /binary search/{

	744

	745 register int32_t mid = (hi+low) >> 1; /Finds median/

	746

	747 if (mid == oldmid)

	748 break;

	749

	750 if (escSeqStateTable_Key_2022[mid] > togo){

	751 hi = mid;

	752 }

	753 else if (escSeqStateTable_Key_2022[mid] < togo){

	754 low = mid;

	755 }

	756 else /we found it/{

	757 *key = togo;

	758 *offset = mid;

	759 return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];

	760 }

	761 oldmid = mid;

	762

	763 }

	764

	765 *key = 0;

	766 *offset = 0;

	767 return INVALID_2022;

	768 }

	769

	770 /*runs through a state machine to determine the escape sequence - codepage corre spondance

	771 */

	772 static void

	773 changeState_2022(UConverter* _this,

	774 const char** source,

	775 const char* sourceLimit,

	776 Variant2022 var,

	777 UErrorCode* err){

	778 UCNV_TableStates_2022 value;

	779 UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInf o);

	780 uint32_t key = myData2022->key;

	781 int32_t offset = 0;

	782 int8_t initialToULength = _this->toULength;

	783 char c;

	784

	785 value = VALID_NON_TERMINAL_2022;

	786 while (*source < sourceLimit) {

	787 c = (source)++;

	788 _this->toUBytes[_this->toULength++]=(uint8_t)c;

	789 value = getKey_2022(c,(int32_t *) &key, &offset);

	790

	791 switch (value){

	792

	793 case VALID_NON_TERMINAL_2022 :

	794 /* continue with the loop */

	795 break;

	796

	797 case VALID_TERMINAL_2022:

	798 key = 0;

	799 goto DONE;

	800

	801 case INVALID_2022:

	802 goto DONE;

	803

	804 case VALID_MAYBE_TERMINAL_2022:

	805 #ifdef U_ENABLE_GENERIC_ISO_2022

	806 /* ESC ( B is ambiguous only for ISO_2022 itself */

	807 if(var == ISO_2022) {

	808 /* discard toUBytes[] for ESC ( B because this sequence is corre ct and complete */

	809 _this->toULength = 0;

	810

	811 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */

	812

	813 /* continue with the loop */

	814 value = VALID_NON_TERMINAL_2022;

	815 break;

	816 } else

	817 #endif

	818 {

	819 /* not ISO_2022 itself, finish here */

	820 value = VALID_TERMINAL_2022;

	821 key = 0;

	822 goto DONE;

	823 }

	824 }

	825 }

	826

	827 DONE:

	828 myData2022->key = key;

	829

	830 if (value == VALID_NON_TERMINAL_2022) {

	831 /* indicate that the escape sequence is incomplete: key!=0 */

	832 return;

	833 } else if (value == INVALID_2022 ) {

	834 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	835 } else /* value == VALID_TERMINAL_2022 */ {

	836 switch(var){

	837 #ifdef U_ENABLE_GENERIC_ISO_2022

	838 case ISO_2022:

	839 {

	840 const char *chosenConverterName = escSeqStateTable_Result_2022[offse t];

	841 if(chosenConverterName == NULL) {

	842 /* SS2 or SS3 */

	843 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	844 _this->toUCallbackReason = UCNV_UNASSIGNED;

	845 return;

	846 }

	847

	848 _this->mode = UCNV_SI;

	849 ucnv_close(myData2022->currentConverter);

	850 myData2022->currentConverter = myUConverter = ucnv_open(chosenConver terName, err);

	851 if(U_SUCCESS(*err)) {

	852 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;

	853 _this->mode = UCNV_SO;

	854 }

	855 break;

	856 }

	857 #endif

	858 case ISO_2022_JP:

	859 {

	860 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];

	861 switch(tempState) {

	862 case INVALID_STATE:

	863 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	864 break;

	865 case SS2_STATE:

	866 if(myData2022->toU2022State.cs[2]!=0) {

	867 if(myData2022->toU2022State.g<2) {

	868 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;

	869 }

	870 myData2022->toU2022State.g=2;

	871 } else {

	872 /* illegal to have SS2 before a matching designator */

	873 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	874 }

	875 break;

	876 /* case SS3_STATE: not used in ISO-2022-JP-x */

	877 case ISO8859_1:

	878 case ISO8859_7:

	879 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

	880 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	881 } else {

	882 /* G2 charset for SS2 */

	883 myData2022->toU2022State.cs[2]=(int8_t)tempState;

	884 }

	885 break;

	886 default:

	887 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

	888 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	889 } else {

	890 /* G0 charset */

	891 myData2022->toU2022State.cs[0]=(int8_t)tempState;

	892 }

	893 break;

	894 }

	895 }

	896 break;

	897 case ISO_2022_CN:

	898 {

	899 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];

	900 switch(tempState) {

	901 case INVALID_STATE:

	902 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	903 break;

	904 case SS2_STATE:

	905 if(myData2022->toU2022State.cs[2]!=0) {

	906 if(myData2022->toU2022State.g<2) {

	907 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;

	908 }

	909 myData2022->toU2022State.g=2;

	910 } else {

	911 /* illegal to have SS2 before a matching designator */

	912 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	913 }

	914 break;

	915 case SS3_STATE:

	916 if(myData2022->toU2022State.cs[3]!=0) {

	917 if(myData2022->toU2022State.g<2) {

	918 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;

	919 }

	920 myData2022->toU2022State.g=3;

	921 } else {

	922 /* illegal to have SS3 before a matching designator */

	923 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	924 }

	925 break;

	926 case ISO_IR_165:

	927 if(myData2022->version==0) {

	928 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	929 break;

	930 }

	931 /fall through/

	932 case GB2312_1:

	933 /fall through/

	934 case CNS_11643_1:

	935 myData2022->toU2022State.cs[1]=(int8_t)tempState;

	936 break;

	937 case CNS_11643_2:

	938 myData2022->toU2022State.cs[2]=(int8_t)tempState;

	939 break;

	940 default:

	941 /* other CNS 11643 planes */

	942 if(myData2022->version==0) {

	943 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	944 } else {

	945 myData2022->toU2022State.cs[3]=(int8_t)tempState;

	946 }

	947 break;

	948 }

	949 }

	950 break;

	951 case ISO_2022_KR:

	952 if(offset==0x30){

	953 /* nothing to be done, just accept this one escape sequence */

	954 } else {

	955 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

	956 }

	957 break;

	958

	959 default:

	960 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	961 break;

	962 }

	963 }

	964 if(U_SUCCESS(*err)) {

	965 _this->toULength = 0;

	966 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {

	967 if(_this->toULength>1) {

	968 /*

	969 * Ticket 5691: consistent illegal sequences:

	970 * - We include at least the first byte (ESC) in the illegal sequenc e.

	971 * - If any of the non-initial bytes could be the start of a charact er,

	972 * we stop the illegal sequence before the first one of those.

	973 * In escape sequences, all following bytes are "printable", that is,

	974 * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),

	975 * they are valid single/lead bytes.

	976 * For simplicity, we always only report the initial ESC byte as t he

	977 * illegal sequence and back out all other bytes we looked at.

	978 */

	979 /* Back out some bytes. */

	980 int8_t backOutDistance=_this->toULength-1;

	981 int8_t bytesFromThisBuffer=_this->toULength-initialToULength;

	982 if(backOutDistance<=bytesFromThisBuffer) {

	983 /* same as initialToULength<=1 */

	984 *source-=backOutDistance;

	985 } else {

	986 /* Back out bytes from the previous buffer: Need to replay them. */

	987 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance );

	988 /* same as -(initialToULength-1) */

	989 /* preToULength is negative! */

	990 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng th);

	991 *source-=bytesFromThisBuffer;

	992 }

	993 _this->toULength=1;

	994 }

	995 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {

	996 _this->toUCallbackReason = UCNV_UNASSIGNED;

	997 }

	998 }

	999

	1000 /*Checks the characters of the buffer against valid 2022 escape sequences

	1001 *if the match we return a pointer to the initial start of the sequence otherwise

	1002 *we return sourceLimit

	1003 */

	1004 /*for 2022 looks ahead in the stream

	1005 *to determine the longest possible convertible

	1006 *data stream

	1007 */

	1008 static U_INLINE const char*

	1009 getEndOfBuffer_2022(const char** source,

	1010 const char* sourceLimit,

	1011 UBool flush){

	1012

	1013 const char* mySource = *source;

	1014

	1015 #ifdef U_ENABLE_GENERIC_ISO_2022

	1016 if (*source >= sourceLimit)

	1017 return sourceLimit;

	1018

	1019 do{

	1020

	1021 if (*mySource == ESC_2022){

	1022 int8_t i;

	1023 int32_t key = 0;

	1024 int32_t offset;

	1025 UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;

	1026

	1027 /* Kludge: I could not

	1028 * figure out the reason for validating an escape sequence

	1029 * twice - once here and once in changeState_2022().

	1030 * is it possible to have an ESC character in a ISO2022

	1031 * byte stream which is valid in a code page? Is it legal?

	1032 */

	1033 for (i=0;

	1034 (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);

	1035 i++) {

	1036 value = getKey_2022(*(mySource+i), &key, &offset);

	1037 }

	1038 if (value > 0 \|\| *mySource==ESC_2022)

	1039 return mySource;

	1040

	1041 if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )

	1042 return sourceLimit;

	1043 }

	1044 }while (++mySource < sourceLimit);

	1045

	1046 return sourceLimit;

	1047 #else

	1048 while(mySource < sourceLimit && *mySource != ESC_2022) {

	1049 ++mySource;

	1050 }

	1051 return mySource;

	1052 #endif

	1053 }

	1054

	1055

	1056 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmb cs.c

	1057 * any future change in _MBCSFromUChar32() function should be reflected here.

	1058 * @return number of bytes in *value; negative number if fallback; 0 if no mappi ng

	1059 */

	1060 static U_INLINE int32_t

	1061 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,

	1062 UChar32 c,

	1063 uint32_t* value,

	1064 UBool useFallback,

	1065 int outputType)

	1066 {

	1067 const int32_t *cx;

	1068 const uint16_t *table;

	1069 uint32_t stage2Entry;

	1070 uint32_t myValue;

	1071 int32_t length;

	1072 const uint8_t *p;

	1073 /*

	1074 * TODO(markus): Use and require new, faster MBCS conversion table structure s.

	1075 * Use internal version of ucnv_open() that verifies that the new structures are available,

	1076 * else U_INTERNAL_PROGRAM_ERROR.

	1077 */

	1078 /* BMP-only codepages are stored without stage 1 entries for supplementary c ode points */

	1079 if(c<0x10000 \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {

	1080 table=sharedData->mbcs.fromUnicodeTable;

	1081 stage2Entry=MBCS_STAGE_2_FROM_U(table, c);

	1082 /* get the bytes and the length for the output */

	1083 if(outputType==MBCS_OUTPUT_2){

	1084 myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);

	1085 if(myValue<=0xff) {

	1086 length=1;

	1087 } else {

	1088 length=2;

	1089 }

	1090 } else /* outputType==MBCS_OUTPUT_3 */ {

	1091 p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, sta ge2Entry, c);

	1092 myValue=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];

	1093 if(myValue<=0xff) {

	1094 length=1;

	1095 } else if(myValue<=0xffff) {

	1096 length=2;

	1097 } else {

	1098 length=3;

	1099 }

	1100 }

	1101 /* is this code point assigned, or do we use fallbacks? */

	1102 if((stage2Entry&(1<<(16+(c&0xf))))!=0) {

	1103 /* assigned */

	1104 *value=myValue;

	1105 return length;

	1106 } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {

	1107 /*

	1108 * We allow a 0 byte output if the "assigned" bit is set for this en try.

	1109 * There is no way with this data structure for fallback output

	1110 * to be a zero byte.

	1111 */

	1112 *value=myValue;

	1113 return -length;

	1114 }

	1115 }

	1116

	1117 cx=sharedData->mbcs.extIndexes;

	1118 if(cx!=NULL) {

	1119 return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);

	1120 }

	1121

	1122 /* unassigned */

	1123 return 0;

	1124 }

	1125

	1126 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c

	1127 * any future change in _MBCSSingleFromUChar32() function should be reflected he re.

	1128 * @param retval pointer to output byte

	1129 * @return 1 roundtrip byte 0 no mapping -1 fallback byte

	1130 */

	1131 static U_INLINE int32_t

	1132 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,

	1133 UChar32 c,

	1134 uint32_t* retval,

	1135 UBool useFallback)

	1136 {

	1137 const uint16_t *table;

	1138 int32_t value;

	1139 /* BMP-only codepages are stored without stage 1 entries for supplementary c ode points */

	1140 if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {

	1141 return 0;

	1142 }

	1143 /* convert the Unicode code point in c into codepage bytes (same as in _MBCS FromUnicodeWithOffsets) */

	1144 table=sharedData->mbcs.fromUnicodeTable;

	1145 /* get the byte for the output */

	1146 value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnic odeBytes, c);

	1147 /* is this code point assigned, or do we use fallbacks? */

	1148 *retval=(uint32_t)(value&0xff);

	1149 if(value>=0xf00) {

	1150 return 1; /* roundtrip */

	1151 } else if(useFallback ? value>=0x800 : value>=0xc00) {

	1152 return -1; /* fallback taken */

	1153 } else {

	1154 return 0; /* no mapping */

	1155 }

	1156 }

	1157

	1158 /*

	1159 * Check that the result is a 2-byte value with each byte in the range A1..FE

	1160 * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte

	1161 * to move it to the ISO 2022 range 21..7E.

	1162 * Return 0 if out of range.

	1163 */

	1164 static U_INLINE uint32_t

	1165 _2022FromGR94DBCS(uint32_t value) {

	1166 if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&

	1167 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)

	1168 ) {

	1169 return value - 0x8080; /* shift down to 21..7e byte range */

	1170 } else {

	1171 return 0; /* not valid for ISO 2022 */

	1172 }

	1173 }

	1174

	1175 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */

	1176 /*

	1177 * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code poin t, it returns the

	1178 * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point

	1179 * unchanged.

	1180 */

	1181 static U_INLINE uint32_t

	1182 _2022ToGR94DBCS(uint32_t value) {

	1183 uint32_t returnValue = value + 0x8080;

	1184 if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&

	1185 (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {

	1186 return returnValue;

	1187 } else {

	1188 return value;

	1189 }

	1190 }

	1191 #endif

	1192

	1193 #ifdef U_ENABLE_GENERIC_ISO_2022

	1194

	1195 /***************************************************************************** *

	1196 * ISO-2022 Converter

	1197 *

	1198 *

	1199 */

	1200

	1201 static void

	1202 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,

	1203 UErrorCode* err){

	1204 const char* mySourceLimit, *realSourceLimit;

	1205 const char* sourceStart;

	1206 const UChar* myTargetStart;

	1207 UConverter* saveThis;

	1208 UConverterDataISO2022* myData;

	1209 int8_t length;

	1210

	1211 saveThis = args->converter;

	1212 myData=((UConverterDataISO2022*)(saveThis->extraInfo));

	1213

	1214 realSourceLimit = args->sourceLimit;

	1215 while (args->source < realSourceLimit) {

	1216 if(myData->key == 0) { /* are we in the middle of an escape sequence? */

	1217 /Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/

	1218 mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit , args->flush);

	1219

	1220 if(args->source < mySourceLimit) {

	1221 if(myData->currentConverter==NULL) {

	1222 myData->currentConverter = ucnv_open("ASCII",err);

	1223 if(U_FAILURE(*err)){

	1224 return;

	1225 }

	1226

	1227 myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U _CALLBACK_STOP;

	1228 saveThis->mode = UCNV_SO;

	1229 }

	1230

	1231 /* convert to before the ESC or until the end of the buffer */

	1232 myData->isFirstBuffer=FALSE;

	1233 sourceStart = args->source;

	1234 myTargetStart = args->target;

	1235 args->converter = myData->currentConverter;

	1236 ucnv_toUnicode(args->converter,

	1237 &args->target,

	1238 args->targetLimit,

	1239 &args->source,

	1240 mySourceLimit,

	1241 args->offsets,

	1242 (UBool)(args->flush && mySourceLimit == realSourceLimit),

	1243 err);

	1244 args->converter = saveThis;

	1245

	1246 if (*err == U_BUFFER_OVERFLOW_ERROR) {

	1247 /* move the overflow buffer */

	1248 length = saveThis->UCharErrorBufferLength = myData->currentC onverter->UCharErrorBufferLength;

	1249 myData->currentConverter->UCharErrorBufferLength = 0;

	1250 if(length > 0) {

	1251 uprv_memcpy(saveThis->UCharErrorBuffer,

	1252 myData->currentConverter->UCharErrorBuffer,

	1253 length*U_SIZEOF_UCHAR);

	1254 }

	1255 return;

	1256 }

	1257

	1258 /*

	1259 * At least one of:

	1260 * -Error while converting

	1261 * -Done with entire buffer

	1262 * -Need to write offsets or update the current offset

	1263 * (leave that up to the code in ucnv.c)

	1264 *

	1265 * or else we just stopped at an ESC byte and continue with chan geState_2022()

	1266 */

	1267 if (U_FAILURE(*err) \|\|

	1268 (args->source == realSourceLimit) \|\|

	1269 (args->offsets != NULL && (args->target != myTargetStart \|\| args->source != sourceStart) \|\|

	1270 (mySourceLimit < realSourceLimit && myData->currentConverter ->toULength > 0))

	1271 ) {

	1272 /* copy partial or error input for truncated detection and e rror handling */

	1273 if(U_FAILURE(*err)) {

	1274 length = saveThis->invalidCharLength = myData->currentCo nverter->invalidCharLength;

	1275 if(length > 0) {

	1276 uprv_memcpy(saveThis->invalidCharBuffer, myData->cur rentConverter->invalidCharBuffer, length);

	1277 }

	1278 } else {

	1279 length = saveThis->toULength = myData->currentConverter- >toULength;

	1280 if(length > 0) {

	1281 uprv_memcpy(saveThis->toUBytes, myData->currentConve rter->toUBytes, length);

	1282 if(args->source < mySourceLimit) {

	1283 err = U_TRUNCATED_CHAR_FOUND; / truncated inpu t before ESC */

	1284 }

	1285 }

	1286 }

	1287 return;

	1288 }

	1289 }

	1290 }

	1291

	1292 sourceStart = args->source;

	1293 changeState_2022(args->converter,

	1294 &(args->source),

	1295 realSourceLimit,

	1296 ISO_2022,

	1297 err);

	1298 if (U_FAILURE(*err) \|\| (args->source != sourceStart && args->offsets != NULL)) {

	1299 /* let the ucnv.c code update its current offset */

	1300 return;

	1301 }

	1302 }

	1303 }

	1304

	1305 #endif

	1306

	1307 /*

	1308 * To Unicode Callback helper function

	1309 */

	1310 static void

	1311 toUnicodeCallback(UConverter *cnv,

	1312 const uint32_t sourceChar, const uint32_t targetUniChar,

	1313 UErrorCode* err){

	1314 if(sourceChar>0xff){

	1315 cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);

	1316 cnv->toUBytes[1] = (uint8_t)sourceChar;

	1317 cnv->toULength = 2;

	1318 }

	1319 else{

	1320 cnv->toUBytes[0] =(char) sourceChar;

	1321 cnv->toULength = 1;

	1322 }

	1323

	1324 if(targetUniChar == (missingCharMarker-1/0xfffe/)){

	1325 *err = U_INVALID_CHAR_FOUND;

	1326 }

	1327 else{

	1328 *err = U_ILLEGAL_CHAR_FOUND;

	1329 }

	1330 }

	1331

	1332 /************************************ISO-2022-JP************************** *****************/

	1333

	1334 /************************************ IMPORTANT ************************** ******************

	1335 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode( ) functions for SBCS,DBCS and

	1336 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().

	1337 * The converter iterates over each Unicode codepoint

	1338 * to obtain the equivalent codepoints from the codepages supported. Since the so urce buffer is

	1339 * processed one char at a time it would make sense to reduce the extra processin g a canned converter

	1340 * would do as far as possible.

	1341 *

	1342 * If the implementation of these macros or structure of sharedData struct change in the future, make

	1343 * sure that ISO-2022 is also changed.

	1344 ****************************************************************************** *****************

	1345 */

	1346

	1347 /***************************************************************************** ******************

	1348 * Rules for ISO-2022-jp encoding

	1349 * (i) Escape sequences must be fully contained within a line they should not

	1350 * span new lines or CRs

	1351 * (ii) If the last character on a line is represented by two bytes then an ASCI I or

	1352 * JIS-Roman character escape sequence should follow before the line termin ates

	1353 * (iii) If the first character on the line is represented by two bytes then a tw o

	1354 * byte character escape sequence should precede it

	1355 * (iv) If no escape sequence is encountered then the characters are ASCII

	1356 * (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,

	1357 * and invoked with SS2 (ESC N).

	1358 * (vi) If there is any G0 designation in text, there must be a switch to

	1359 * ASCII or to JIS X 0201-Roman before a space character (but not

	1360 * necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control

	1361 * characters such as tab or CRLF.

	1362 * (vi) Supported encodings:

	1363 * ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-885 9-7

	1364 *

	1365 * source : RFC-1554

	1366 *

	1367 * JISX201, JISX208,JISX212 : new .cnv data files created

	1368 * KSC5601 : alias to ibm-949 mapping table

	1369 * GB2312 : alias to ibm-1386 mapping table

	1370 * ISO-8859-1 : Algorithmic implemented as LATIN1 case

	1371 * ISO-8859-7 : alisas to ibm-9409 mapping table

	1372 */

	1373

	1374 /* preference order of JP charsets */

	1375 static const StateEnum jpCharsetPref[]={

	1376 ASCII,

	1377 JISX201,

	1378 ISO8859_1,

	1379 ISO8859_7,

	1380 JISX208,

	1381 JISX212,

	1382 GB2312,

	1383 KSC5601,

	1384 HWKANA_7BIT

	1385 };

	1386

	1387 /*

	1388 * The escape sequences must be in order of the enum constants like JISX201 = 3 ,

	1389 * not in order of jpCharsetPref[]!

	1390 */

	1391 static const char escSeqChars[][6] ={

	1392 "\x1B\x28\x42", /* <ESC>(B ASCII */

	1393 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */

	1394 "\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */

	1395 "\x1B\x28\x4A", /* <ESC>(J JISX-201 */

	1396 "\x1B\x24\x42", /* <ESC>$B JISX-208 */

	1397 "\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */

	1398 "\x1B\x24\x41", /* <ESC>$A GB2312 */

	1399 "\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */

	1400 "\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */

	1401

	1402 };

	1403 static const int8_t escSeqCharsLen[] ={

	1404 3, /* length of <ESC>(B ASCII */

	1405 3, /* length of <ESC>.A ISO-8859-1 */

	1406 3, /* length of <ESC>.F ISO-8859-7 */

	1407 3, /* length of <ESC>(J JISX-201 */

	1408 3, /* length of <ESC>$B JISX-208 */

	1409 4, /* length of <ESC>$(D JISX-212 */

	1410 3, /* length of <ESC>$A GB2312 */

	1411 4, /* length of <ESC>$(C KSC5601 */

	1412 3 /* length of <ESC>(I HWKANA_7BIT */

	1413 };

	1414

	1415 /*

	1416 * The iteration over various code pages works this way:

	1417 * i) Get the currentState from myConverterData->currentState

	1418 * ii) Check if the character is mapped to a valid character in the currentState

	1419 * Yes -> a) set the initIterState to currentState

	1420 * b) remain in this state until an invalid character is found

	1421 * No -> a) go to the next code page and find the character

	1422 * iii) Before changing the state increment the current state check if the curren t state

	1423 * is equal to the intitIteration state

	1424 * Yes -> A character that cannot be represented in any of the supported en codings

	1425 * break and return a U_INVALID_CHARACTER error

	1426 * No -> Continue and find the character in next code page

	1427 *

	1428 *

	1429 * TODO: Implement a priority technique where the users are allowed to set the pr iority of code pages

	1430 */

	1431

	1432 /* Map 00..7F to Unicode according to JIS X 0201. */

	1433 static U_INLINE uint32_t

	1434 jisx201ToU(uint32_t value) {

	1435 if(value < 0x5c) {

	1436 return value;

	1437 } else if(value == 0x5c) {

	1438 return 0xa5;

	1439 } else if(value == 0x7e) {

	1440 return 0x203e;

	1441 } else /* value <= 0x7f */ {

	1442 return value;

	1443 }

	1444 }

	1445

	1446 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. * /

	1447 static U_INLINE uint32_t

	1448 jisx201FromU(uint32_t value) {

	1449 if(value<=0x7f) {

	1450 if(value!=0x5c && value!=0x7e) {

	1451 return value;

	1452 }

	1453 } else if(value==0xa5) {

	1454 return 0x5c;

	1455 } else if(value==0x203e) {

	1456 return 0x7e;

	1457 }

	1458 return 0xfffe;

	1459 }

	1460

	1461 /*

	1462 * Take a valid Shift-JIS byte pair, check that it is in the range corresponding

	1463 * to JIS X 0208, and convert it to a pair of 21..7E bytes.

	1464 * Return 0 if the byte pair is out of range.

	1465 */

	1466 static U_INLINE uint32_t

	1467 _2022FromSJIS(uint32_t value) {

	1468 uint8_t trail;

	1469

	1470 if(value > 0xEFFC) {

	1471 return 0; /* beyond JIS X 0208 */

	1472 }

	1473

	1474 trail = (uint8_t)value;

	1475

	1476 value &= 0xff00; /* lead byte */

	1477 if(value <= 0x9f00) {

	1478 value -= 0x7000;

	1479 } else /* 0xe000 <= value <= 0xef00 */ {

	1480 value -= 0xb000;

	1481 }

	1482 value <<= 1;

	1483

	1484 if(trail <= 0x9e) {

	1485 value -= 0x100;

	1486 if(trail <= 0x7e) {

	1487 value \|= trail - 0x1f;

	1488 } else {

	1489 value \|= trail - 0x20;

	1490 }

	1491 } else /* trail <= 0xfc */ {

	1492 value \|= trail - 0x7e;

	1493 }

	1494 return value;

	1495 }

	1496

	1497 /*

	1498 * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.

	1499 * If either byte is outside 21..7E make sure that the result is not valid

	1500 * for Shift-JIS so that the converter catches it.

	1501 * Some invalid byte values already turn into equally invalid Shift-JIS

	1502 * byte values and need not be tested explicitly.

	1503 */

	1504 static U_INLINE void

	1505 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {

	1506 if(c1&1) {

	1507 ++c1;

	1508 if(c2 <= 0x5f) {

	1509 c2 += 0x1f;

	1510 } else if(c2 <= 0x7e) {

	1511 c2 += 0x20;

	1512 } else {

	1513 c2 = 0; /* invalid */

	1514 }

	1515 } else {

	1516 if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {

	1517 c2 += 0x7e;

	1518 } else {

	1519 c2 = 0; /* invalid */

	1520 }

	1521 }

	1522 c1 >>= 1;

	1523 if(c1 <= 0x2f) {

	1524 c1 += 0x70;

	1525 } else if(c1 <= 0x3f) {

	1526 c1 += 0xb0;

	1527 } else {

	1528 c1 = 0; /* invalid */

	1529 }

	1530 bytes[0] = (char)c1;

	1531 bytes[1] = (char)c2;

	1532 }

	1533

	1534 /*

	1535 * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS )

	1536 * Katakana.

	1537 * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fa llbacks

	1538 * because Shift-JIS roundtrips half-width Katakana to single bytes.

	1539 * These were the only fallbacks in ICU's jisx-208.ucm file.

	1540 */

	1541 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {

	1542 0x2123, /* U+FF61 */

	1543 0x2156,

	1544 0x2157,

	1545 0x2122,

	1546 0x2126,

	1547 0x2572,

	1548 0x2521,

	1549 0x2523,

	1550 0x2525,

	1551 0x2527,

	1552 0x2529,

	1553 0x2563,

	1554 0x2565,

	1555 0x2567,

	1556 0x2543,

	1557 0x213C, /* U+FF70 */

	1558 0x2522,

	1559 0x2524,

	1560 0x2526,

	1561 0x2528,

	1562 0x252A,

	1563 0x252B,

	1564 0x252D,

	1565 0x252F,

	1566 0x2531,

	1567 0x2533,

	1568 0x2535,

	1569 0x2537,

	1570 0x2539,

	1571 0x253B,

	1572 0x253D,

	1573 0x253F, /* U+FF80 */

	1574 0x2541,

	1575 0x2544,

	1576 0x2546,

	1577 0x2548,

	1578 0x254A,

	1579 0x254B,

	1580 0x254C,

	1581 0x254D,

	1582 0x254E,

	1583 0x254F,

	1584 0x2552,

	1585 0x2555,

	1586 0x2558,

	1587 0x255B,

	1588 0x255E,

	1589 0x255F, /* U+FF90 */

	1590 0x2560,

	1591 0x2561,

	1592 0x2562,

	1593 0x2564,

	1594 0x2566,

	1595 0x2568,

	1596 0x2569,

	1597 0x256A,

	1598 0x256B,

	1599 0x256C,

	1600 0x256D,

	1601 0x256F,

	1602 0x2573,

	1603 0x212B,

	1604 0x212C /* U+FF9F */

	1605 };

	1606

	1607 static void

	1608 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args , UErrorCode* err) {

	1609 UConverter *cnv = args->converter;

	1610 UConverterDataISO2022 *converterData;

	1611 ISO2022State *pFromU2022State;

	1612 uint8_t target = (uint8_t ) args->target;

	1613 const uint8_t targetLimit = (const uint8_t ) args->targetLimit;

	1614 const UChar* source = args->source;

	1615 const UChar* sourceLimit = args->sourceLimit;

	1616 int32_t* offsets = args->offsets;

	1617 UChar32 sourceChar;

	1618 char buffer[8];

	1619 int32_t len, outLen;

	1620 int8_t choices[10];

	1621 int32_t choiceCount;

	1622 uint32_t targetValue = 0;

	1623 UBool useFallback;

	1624

	1625 int32_t i;

	1626 int8_t cs, g;

	1627

	1628 /* set up the state */

	1629 converterData = (UConverterDataISO2022*)cnv->extraInfo;

	1630 pFromU2022State = &converterData->fromU2022State;

	1631

	1632 choiceCount = 0;

	1633

	1634 /* check if the last codepoint of previous buffer was a lead surrogate*/

	1635 if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {

	1636 goto getTrail;

	1637 }

	1638

	1639 while(source < sourceLimit) {

	1640 if(target < targetLimit) {

	1641

	1642 sourceChar = *(source++);

	1643 /check if the char is a First surrogate/

	1644 if(UTF_IS_SURROGATE(sourceChar)) {

	1645 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

	1646 getTrail:

	1647 /look ahead to find the trail surrogate/

	1648 if(source < sourceLimit) {

	1649 /* test the following code unit */

	1650 UChar trail=(UChar) *source;

	1651 if(UTF_IS_SECOND_SURROGATE(trail)) {

	1652 source++;

	1653 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

	1654 cnv->fromUChar32=0x00;

	1655 /* convert this supplementary code point */

	1656 /* exit this condition tree */

	1657 } else {

	1658 /* this is an unmatched lead code unit (1st surrogat e) */

	1659 /* callback(illegal) */

	1660 *err=U_ILLEGAL_CHAR_FOUND;

	1661 cnv->fromUChar32=sourceChar;

	1662 break;

	1663 }

	1664 } else {

	1665 /* no more input */

	1666 cnv->fromUChar32=sourceChar;

	1667 break;

	1668 }

	1669 } else {

	1670 /* this is an unmatched trail code unit (2nd surrogate) */

	1671 /* callback(illegal) */

	1672 *err=U_ILLEGAL_CHAR_FOUND;

	1673 cnv->fromUChar32=sourceChar;

	1674 break;

	1675 }

	1676 }

	1677

	1678 /* do not convert SO/SI/ESC */

	1679 if(IS_2022_CONTROL(sourceChar)) {

	1680 /* callback(illegal) */

	1681 *err=U_ILLEGAL_CHAR_FOUND;

	1682 cnv->fromUChar32=sourceChar;

	1683 break;

	1684 }

	1685

	1686 /* do the conversion */

	1687

	1688 if(choiceCount == 0) {

	1689 uint16_t csm;

	1690

	1691 /*

	1692 * The csm variable keeps track of which charsets are allowed

	1693 * and not used yet while building the choices[].

	1694 */

	1695 csm = jpCharsetMasks[converterData->version];

	1696 choiceCount = 0;

	1697

	1698 /* JIS7/8: try single-byte half-width Katakana before JISX208 */

	1699 if(converterData->version == 3 \|\| converterData->version == 4) {

	1700 choices[choiceCount++] = (int8_t)HWKANA_7BIT;

	1701 }

	1702 /* Do not try single-byte half-width Katakana for other versions . */

	1703 csm &= ~CSM(HWKANA_7BIT);

	1704

	1705 /* try the current G0 charset */

	1706 choices[choiceCount++] = cs = pFromU2022State->cs[0];

	1707 csm &= ~CSM(cs);

	1708

	1709 /* try the current G2 charset */

	1710 if((cs = pFromU2022State->cs[2]) != 0) {

	1711 choices[choiceCount++] = cs;

	1712 csm &= ~CSM(cs);

	1713 }

	1714

	1715 /* try all the other possible charsets */

	1716 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {

	1717 cs = (int8_t)jpCharsetPref[i];

	1718 if(CSM(cs) & csm) {

	1719 choices[choiceCount++] = cs;

	1720 csm &= ~CSM(cs);

	1721 }

	1722 }

	1723 }

	1724

	1725 cs = g = 0;

	1726 /*

	1727 * len==0: no mapping found yet

	1728 * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks

	1729 * len>0: found a roundtrip result, done

	1730 */

	1731 len = 0;

	1732 /*

	1733 * We will turn off useFallback after finding a fallback,

	1734 * but we still get fallbacks from PUA code points as usual.

	1735 * Therefore, we will also need to check that we don't overwrite

	1736 * an early fallback with a later one.

	1737 */

	1738 useFallback = cnv->useFallback;

	1739

	1740 for(i = 0; i < choiceCount && len <= 0; ++i) {

	1741 uint32_t value;

	1742 int32_t len2;

	1743 int8_t cs0 = choices[i];

	1744 switch(cs0) {

	1745 case ASCII:

	1746 if(sourceChar <= 0x7f) {

	1747 targetValue = (uint32_t)sourceChar;

	1748 len = 1;

	1749 cs = cs0;

	1750 g = 0;

	1751 }

	1752 break;

	1753 case ISO8859_1:

	1754 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {

	1755 targetValue = (uint32_t)sourceChar - 0x80;

	1756 len = 1;

	1757 cs = cs0;

	1758 g = 2;

	1759 }

	1760 break;

	1761 case HWKANA_7BIT:

	1762 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) {

	1763 if(converterData->version==3) {

	1764 /* JIS7: use G1 (SO) */

	1765 /* Shift U+FF61..U+FF9F to bytes 21..5F. */

	1766 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));

	1767 len = 1;

	1768 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */

	1769 g = 1;

	1770 } else if(converterData->version==4) {

	1771 /* JIS8: use 8-bit bytes with any single-byte charse t, see escape sequence output below */

	1772 /* Shift U+FF61..U+FF9F to bytes A1..DF. */

	1773 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));

	1774 len = 1;

	1775

	1776 cs = pFromU2022State->cs[0];

	1777 if(IS_JP_DBCS(cs)) {

	1778 /* switch from a DBCS charset to JISX201 */

	1779 cs = (int8_t)JISX201;

	1780 }

	1781 /* else stay in the current G0 charset */

	1782 g = 0;

	1783 }

	1784 /* else do not use HWKANA_7BIT with other versions */

	1785 }

	1786 break;

	1787 case JISX201:

	1788 /* G0 SBCS */

	1789 value = jisx201FromU(sourceChar);

	1790 if(value <= 0x7f) {

	1791 targetValue = value;

	1792 len = 1;

	1793 cs = cs0;

	1794 g = 0;

	1795 useFallback = FALSE;

	1796 }

	1797 break;

	1798 case JISX208:

	1799 /* G0 DBCS from Shift-JIS table */

	1800 len2 = MBCS_FROM_UCHAR32_ISO2022(

	1801 converterData->myConverterArray[cs0],

	1802 sourceChar, &value,

	1803 useFallback, MBCS_OUTPUT_2);

	1804 if(len2 == 2 \|\| (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

	1805 value = _2022FromSJIS(value);

	1806 if(value != 0) {

	1807 targetValue = value;

	1808 len = len2;

	1809 cs = cs0;

	1810 g = 0;

	1811 useFallback = FALSE;

	1812 }

	1813 } else if(len == 0 && useFallback &&

	1814 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) {

	1815 targetValue = hwkana_fb[sourceChar - HWKANA_START];

	1816 len = -2;

	1817 cs = cs0;

	1818 g = 0;

	1819 useFallback = FALSE;

	1820 }

	1821 break;

	1822 case ISO8859_7:

	1823 /* G0 SBCS forced to 7-bit output */

	1824 len2 = MBCS_SINGLE_FROM_UCHAR32(

	1825 converterData->myConverterArray[cs0],

	1826 sourceChar, &value,

	1827 useFallback);

	1828 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) {

	1829 targetValue = value - 0x80;

	1830 len = len2;

	1831 cs = cs0;

	1832 g = 2;

	1833 useFallback = FALSE;

	1834 }

	1835 break;

	1836 default:

	1837 /* G0 DBCS */

	1838 len2 = MBCS_FROM_UCHAR32_ISO2022(

	1839 converterData->myConverterArray[cs0],

	1840 sourceChar, &value,

	1841 useFallback, MBCS_OUTPUT_2);

	1842 if(len2 == 2 \|\| (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

	1843 if(cs0 == KSC5601) {

	1844 /*

	1845 * Check for valid bytes for the encoding scheme.

	1846 * This is necessary because the sub-converter (wind ows-949)

	1847 * has a broader encoding scheme than is valid for 2 022.

	1848 */

	1849 value = _2022FromGR94DBCS(value);

	1850 if(value == 0) {

	1851 break;

	1852 }

	1853 }

	1854 targetValue = value;

	1855 len = len2;

	1856 cs = cs0;

	1857 g = 0;

	1858 useFallback = FALSE;

	1859 }

	1860 break;

	1861 }

	1862 }

	1863

	1864 if(len != 0) {

	1865 if(len < 0) {

	1866 len = -len; /* fallback */

	1867 }

	1868 outLen = 0; /* count output bytes */

	1869

	1870 /* write SI if necessary (only for JIS7) */

	1871 if(pFromU2022State->g == 1 && g == 0) {

	1872 buffer[outLen++] = UCNV_SI;

	1873 pFromU2022State->g = 0;

	1874 }

	1875

	1876 /* write the designation sequence if necessary */

	1877 if(cs != pFromU2022State->cs[g]) {

	1878 int32_t escLen = escSeqCharsLen[cs];

	1879 uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);

	1880 outLen += escLen;

	1881 pFromU2022State->cs[g] = cs;

	1882

	1883 /* invalidate the choices[] */

	1884 choiceCount = 0;

	1885 }

	1886

	1887 /* write the shift sequence if necessary */

	1888 if(g != pFromU2022State->g) {

	1889 switch(g) {

	1890 /* case 0 handled before writing escapes */

	1891 case 1:

	1892 buffer[outLen++] = UCNV_SO;

	1893 pFromU2022State->g = 1;

	1894 break;

	1895 default: /* case 2 */

	1896 buffer[outLen++] = 0x1b;

	1897 buffer[outLen++] = 0x4e;

	1898 break;

	1899 /* no case 3: no SS3 in ISO-2022-JP-x */

	1900 }

	1901 }

	1902

	1903 /* write the output bytes */

	1904 if(len == 1) {

	1905 buffer[outLen++] = (char)targetValue;

	1906 } else /* len == 2 */ {

	1907 buffer[outLen++] = (char)(targetValue >> 8);

	1908 buffer[outLen++] = (char)targetValue;

	1909 }

	1910 } else {

	1911 /*

	1912 * if we cannot find the character after checking all codepages

	1913 * then this is an error

	1914 */

	1915 *err = U_INVALID_CHAR_FOUND;

	1916 cnv->fromUChar32=sourceChar;

	1917 break;

	1918 }

	1919

	1920 if(sourceChar == CR \|\| sourceChar == LF) {

	1921 /* reset the G2 state at the end of a line (conversion got us in to ASCII or JISX201 already) */

	1922 pFromU2022State->cs[2] = 0;

	1923 choiceCount = 0;

	1924 }

	1925

	1926 /* output outLen>0 bytes in buffer[] */

	1927 if(outLen == 1) {

	1928 *target++ = buffer[0];

	1929 if(offsets) {

	1930 offsets++ = (int32_t)(source - args->source - 1); / -1: kn own to be ASCII */

	1931 }

	1932 } else if(outLen == 2 && (target + 2) <= targetLimit) {

	1933 *target++ = buffer[0];

	1934 *target++ = buffer[1];

	1935 if(offsets) {

	1936 int32_t sourceIndex = (int32_t)(source - args->source - U16_ LENGTH(sourceChar));

	1937 *offsets++ = sourceIndex;

	1938 *offsets++ = sourceIndex;

	1939 }

	1940 } else {

	1941 fromUWriteUInt8(

	1942 cnv,

	1943 buffer, outLen,

	1944 &target, (const char *)targetLimit,

	1945 &offsets, (int32_t)(source - args->source - U16_LENGTH(sourc eChar)),

	1946 err);

	1947 if(U_FAILURE(*err)) {

	1948 break;

	1949 }

	1950 }

	1951 } /* end if(myTargetIndex<myTargetLength) */

	1952 else{

	1953 *err =U_BUFFER_OVERFLOW_ERROR;

	1954 break;

	1955 }

	1956

	1957 }/* end while(mySourceIndex<mySourceLength) */

	1958

	1959 /*

	1960 * the end of the input stream and detection of truncated input

	1961 * are handled by the framework, but for ISO-2022-JP conversion

	1962 * we need to be in ASCII mode at the very end

	1963 *

	1964 * conditions:

	1965 * successful

	1966 * in SO mode or not in ASCII mode

	1967 * end of input and no truncated input

	1968 */

	1969 if( U_SUCCESS(*err) &&

	1970 (pFromU2022State->g!=0 \|\| pFromU2022State->cs[0]!=ASCII) &&

	1971 args->flush && source>=sourceLimit && cnv->fromUChar32==0

	1972 ) {

	1973 int32_t sourceIndex;

	1974

	1975 outLen = 0;

	1976

	1977 if(pFromU2022State->g != 0) {

	1978 buffer[outLen++] = UCNV_SI;

	1979 pFromU2022State->g = 0;

	1980 }

	1981

	1982 if(pFromU2022State->cs[0] != ASCII) {

	1983 int32_t escLen = escSeqCharsLen[ASCII];

	1984 uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);

	1985 outLen += escLen;

	1986 pFromU2022State->cs[0] = (int8_t)ASCII;

	1987 }

	1988

	1989 /* get the source index of the last input character */

	1990 /*

	1991 * TODO this would be simpler and more reliable if we used a pair

	1992 * of sourceIndex/prevSourceIndex like in ucnvmbcs.c

	1993 * so that we could simply use the prevSourceIndex here;

	1994 * this code gives an incorrect result for the rare case of an unmatched

	1995 * trail surrogate that is alone in the last buffer of the text stream

	1996 */

	1997 sourceIndex=(int32_t)(source-args->source);

	1998 if(sourceIndex>0) {

	1999 --sourceIndex;

	2000 if( U16_IS_TRAIL(args->source[sourceIndex]) &&

	2001 (sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1]))

	2002 ) {

	2003 --sourceIndex;

	2004 }

	2005 } else {

	2006 sourceIndex=-1;

	2007 }

	2008

	2009 fromUWriteUInt8(

	2010 cnv,

	2011 buffer, outLen,

	2012 &target, (const char *)targetLimit,

	2013 &offsets, sourceIndex,

	2014 err);

	2015 }

	2016

	2017 /save the state and return /

	2018 args->source = source;

	2019 args->target = (char*)target;

	2020 }

	2021

	2022 /************* to unicode *****************/

	2023

	2024 static void

	2025 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

	2026 UErrorCode* err){

	2027 char tempBuf[2];

	2028 const char mySource = (char ) args->source;

	2029 UChar *myTarget = args->target;

	2030 const char *mySourceLimit = args->sourceLimit;

	2031 uint32_t targetUniChar = 0x0000;

	2032 uint32_t mySourceChar = 0x0000;

	2033 uint32_t tmpSourceChar = 0x0000;

	2034 UConverterDataISO2022* myData;

	2035 ISO2022State *pToU2022State;

	2036 StateEnum cs;

	2037

	2038 myData=(UConverterDataISO2022*)(args->converter->extraInfo);

	2039 pToU2022State = &myData->toU2022State;

	2040

	2041 if(myData->key != 0) {

	2042 /* continue with a partial escape sequence */

	2043 goto escape;

	2044 } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myT arget < args->targetLimit) {

	2045 /* continue with a partial double-byte character */

	2046 mySourceChar = args->converter->toUBytes[0];

	2047 args->converter->toULength = 0;

	2048 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];

	2049 targetUniChar = missingCharMarker;

	2050 goto getTrailByte;

	2051 }

	2052

	2053 while(mySource < mySourceLimit){

	2054

	2055 targetUniChar =missingCharMarker;

	2056

	2057 if(myTarget < args->targetLimit){

	2058

	2059 mySourceChar= (unsigned char) *mySource++;

	2060

	2061 switch(mySourceChar) {

	2062 case UCNV_SI:

	2063 if(myData->version==3) {

	2064 pToU2022State->g=0;

	2065 continue;

	2066 } else {

	2067 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */

	2068 myData->isEmptySegment = FALSE; /* reset this, we have a different error */

	2069 break;

	2070 }

	2071

	2072 case UCNV_SO:

	2073 if(myData->version==3) {

	2074 /* JIS7: switch to G1 half-width Katakana */

	2075 pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;

	2076 pToU2022State->g=1;

	2077 continue;

	2078 } else {

	2079 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */

	2080 myData->isEmptySegment = FALSE; /* reset this, we have a different error */

	2081 break;

	2082 }

	2083

	2084 case ESC_2022:

	2085 mySource--;

	2086 escape:

	2087 {

	2088 const char * mySourceBefore = mySource;

	2089 int8_t toULengthBefore = args->converter->toULength;

	2090

	2091 changeState_2022(args->converter,&(mySource),

	2092 mySourceLimit, ISO_2022_JP,err);

	2093

	2094 /* If in ISO-2022-JP only and we successully completed an es cape sequence, but previous segment was empty, create an error */

	2095 if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) & & myData->isEmptySegment) {

	2096 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	2097 args->converter->toUCallbackReason = UCNV_IRREGULAR;

	2098 args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));

	2099 }

	2100 }

	2101

	2102 /* invalid or illegal escape sequence */

	2103 if(U_FAILURE(*err)){

	2104 args->target = myTarget;

	2105 args->source = mySource;

	2106 myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */

	2107 return;

	2108 }

	2109 /* If we successfully completed an escape sequence, we begin a n ew segment, empty so far */

	2110 if(myData->key==0) {

	2111 myData->isEmptySegment = TRUE;

	2112 }

	2113 continue;

	2114

	2115 /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */

	2116

	2117 case CR:

	2118 /falls through/

	2119 case LF:

	2120 /* automatically reset to single-byte mode */

	2121 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU20 22State->cs[0] != JISX201) {

	2122 pToU2022State->cs[0] = (int8_t)ASCII;

	2123 }

	2124 pToU2022State->cs[2] = 0;

	2125 pToU2022State->g = 0;

	2126 /* falls through */

	2127 default:

	2128 /* convert one or two bytes */

	2129 myData->isEmptySegment = FALSE;

	2130 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];

	2131 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->v ersion==4 &&

	2132 !IS_JP_DBCS(cs)

	2133 ) {

	2134 /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */

	2135 targetUniChar = mySourceChar + (HWKANA_START - 0xa1);

	2136

	2137 /* return from a single-shift state to the previous one */

	2138 if(pToU2022State->g >= 2) {

	2139 pToU2022State->g=pToU2022State->prevG;

	2140 }

	2141 } else switch(cs) {

	2142 case ASCII:

	2143 if(mySourceChar <= 0x7f) {

	2144 targetUniChar = mySourceChar;

	2145 }

	2146 break;

	2147 case ISO8859_1:

	2148 if(mySourceChar <= 0x7f) {

	2149 targetUniChar = mySourceChar + 0x80;

	2150 }

	2151 /* return from a single-shift state to the previous one */

	2152 pToU2022State->g=pToU2022State->prevG;

	2153 break;

	2154 case ISO8859_7:

	2155 if(mySourceChar <= 0x7f) {

	2156 /* convert mySourceChar+0x80 to use a normal 8-bit table */

	2157 targetUniChar =

	2158 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(

	2159 myData->myConverterArray[cs],

	2160 mySourceChar + 0x80);

	2161 }

	2162 /* return from a single-shift state to the previous one */

	2163 pToU2022State->g=pToU2022State->prevG;

	2164 break;

	2165 case JISX201:

	2166 if(mySourceChar <= 0x7f) {

	2167 targetUniChar = jisx201ToU(mySourceChar);

	2168 }

	2169 break;

	2170 case HWKANA_7BIT:

	2171 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {

	2172 /* 7-bit halfwidth Katakana */

	2173 targetUniChar = mySourceChar + (HWKANA_START - 0x21);

	2174 }

	2175 break;

	2176 default:

	2177 /* G0 DBCS */

	2178 if(mySource < mySourceLimit) {

	2179 int leadIsOk, trailIsOk;

	2180 uint8_t trailByte;

	2181 getTrailByte:

	2182 trailByte = (uint8_t)*mySource;

	2183 /*

	2184 * Ticket 5691: consistent illegal sequences:

	2185 * - We include at least the first byte in the illegal s equence.

	2186 * - If any of the non-initial bytes could be the start of a character,

	2187 * we stop the illegal sequence before the first one o f those.

	2188 *

	2189 * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is

	2190 * an ESC/SO/SI, we report only the first byte as the il legal sequence.

	2191 * Otherwise we convert or report the pair of bytes.

	2192 */

	2193 leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x2 1);

	2194 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ;

	2195 if (leadIsOk && trailIsOk) {

	2196 ++mySource;

	2197 tmpSourceChar = (mySourceChar << 8) \| trailByte;

	2198 if(cs == JISX208) {

	2199 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf);

	2200 mySourceChar = tmpSourceChar;

	2201 } else {

	2202 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */

	2203 mySourceChar = tmpSourceChar;

	2204 if (cs == KSC5601) {

	2205 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */

	2206 }

	2207 tempBuf[0] = (char)(tmpSourceChar >> 8);

	2208 tempBuf[1] = (char)(tmpSourceChar);

	2209 }

	2210 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE);

	2211 } else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {

	2212 /* report a pair of illegal bytes if the second byte is not a DBCS starter */

	2213 ++mySource;

	2214 /* add another bit so that the code below writes 2 b ytes in case of error */

	2215 mySourceChar = 0x10000 \| (mySourceChar << 8) \| trail Byte;

	2216 }

	2217 } else {

	2218 args->converter->toUBytes[0] = (uint8_t)mySourceChar;

	2219 args->converter->toULength = 1;

	2220 goto endloop;

	2221 }

	2222 } /* End of inner switch */

	2223 break;

	2224 } /* End of outer switch */

	2225 if(targetUniChar < (missingCharMarker-1/0xfffe/)){

	2226 if(args->offsets){

	2227 args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

	2228 }

	2229 *(myTarget++)=(UChar)targetUniChar;

	2230 }

	2231 else if(targetUniChar > missingCharMarker){

	2232 /* disassemble the surrogate pair and write to output*/

	2233 targetUniChar-=0x0010000;

	2234 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));

	2235 if(args->offsets){

	2236 args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

	2237 }

	2238 ++myTarget;

	2239 if(myTarget< args->targetLimit){

	2240 *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

	2241 if(args->offsets){

	2242 args->offsets[myTarget - args->target] = (int32_t)(mySou rce - args->source - (mySourceChar <= 0xff ? 1 : 2));

	2243 }

	2244 ++myTarget;

	2245 }else{

	2246 args->converter->UCharErrorBuffer[args->converter->UCharErro rBufferLength++]=

	2247 (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)) ;

	2248 }

	2249

	2250 }

	2251 else{

	2252 /* Call the callback function*/

	2253 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err );

	2254 break;

	2255 }

	2256 }

	2257 else{ /* goes with "if(myTarget < args->targetLimit)" way up near to p of function */

	2258 *err =U_BUFFER_OVERFLOW_ERROR;

	2259 break;

	2260 }

	2261 }

	2262 endloop:

	2263 args->target = myTarget;

	2264 args->source = mySource;

	2265 }

	2266

	2267

	2268 /***************************************************************

	2269 * Rules for ISO-2022-KR encoding

	2270 * i) The KSC5601 designator sequence should appear only once in a file,

	2271 * at the begining of a line before any KSC5601 characters. This usually

	2272 * means that it appears by itself on the first line of the file

	2273 * ii) There are only 2 shifting sequences SO to shift into double byte mode

	2274 * and SI to shift into single byte mode

	2275 */

	2276 static void

	2277 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){

	2278

	2279 UConverter* saveConv = args->converter;

	2280 UConverterDataISO2022 myConverterData=(UConverterDataISO2022)saveConv->ext raInfo;

	2281 args->converter=myConverterData->currentConverter;

	2282

	2283 myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;

	2284 ucnv_MBCSFromUnicodeWithOffsets(args,err);

	2285 saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;

	2286

	2287 if(*err == U_BUFFER_OVERFLOW_ERROR) {

	2288 if(myConverterData->currentConverter->charErrorBufferLength > 0) {

	2289 uprv_memcpy(

	2290 saveConv->charErrorBuffer,

	2291 myConverterData->currentConverter->charErrorBuffer,

	2292 myConverterData->currentConverter->charErrorBufferLength);

	2293 }

	2294 saveConv->charErrorBufferLength = myConverterData->currentConverter->cha rErrorBufferLength;

	2295 myConverterData->currentConverter->charErrorBufferLength = 0;

	2296 }

	2297 args->converter=saveConv;

	2298 }

	2299

	2300 static void

	2301 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args , UErrorCode* err){

	2302

	2303 const UChar *source = args->source;

	2304 const UChar *sourceLimit = args->sourceLimit;

	2305 unsigned char target = (unsigned char ) args->target;

	2306 unsigned char targetLimit = (unsigned char ) args->targetLimit;

	2307 int32_t* offsets = args->offsets;

	2308 uint32_t targetByteUnit = 0x0000;

	2309 UChar32 sourceChar = 0x0000;

	2310 UBool isTargetByteDBCS;

	2311 UBool oldIsTargetByteDBCS;

	2312 UConverterDataISO2022 *converterData;

	2313 UConverterSharedData* sharedData;

	2314 UBool useFallback;

	2315 int32_t length =0;

	2316

	2317 converterData=(UConverterDataISO2022*)args->converter->extraInfo;

	2318 /* if the version is 1 then the user is requesting

	2319 * conversion with ibm-25546 pass the arguments to

	2320 * MBCS converter and return

	2321 */

	2322 if(converterData->version==1){

	2323 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);

	2324 return;

	2325 }

	2326

	2327 /* initialize data */

	2328 sharedData = converterData->currentConverter->sharedData;

	2329 useFallback = args->converter->useFallback;

	2330 isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;

	2331 oldIsTargetByteDBCS = isTargetByteDBCS;

	2332

	2333 isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;

	2334 if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {

	2335 goto getTrail;

	2336 }

	2337 while(source < sourceLimit){

	2338

	2339 targetByteUnit = missingCharMarker;

	2340

	2341 if(target < (unsigned char*) args->targetLimit){

	2342 sourceChar = *source++;

	2343

	2344 /* do not convert SO/SI/ESC */

	2345 if(IS_2022_CONTROL(sourceChar)) {

	2346 /* callback(illegal) */

	2347 *err=U_ILLEGAL_CHAR_FOUND;

	2348 args->converter->fromUChar32=sourceChar;

	2349 break;

	2350 }

	2351

	2352 length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByte Unit,useFallback,MBCS_OUTPUT_2);

	2353 if(length < 0) {

	2354 length = -length; /* fallback */

	2355 }

	2356 /* only DBCS or SBCS characters are expected*/

	2357 /* DB characters with high bit set to 1 are expected */

	2358 if( length > 2 \|\| length==0 \|\|

	2359 (length == 1 && targetByteUnit > 0x7f) \|\|

	2360 (length == 2 &&

	2361 ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) \|\|

	2362 (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))

	2363 ) {

	2364 targetByteUnit=missingCharMarker;

	2365 }

	2366 if (targetByteUnit != missingCharMarker){

	2367

	2368 oldIsTargetByteDBCS = isTargetByteDBCS;

	2369 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);

	2370 /* append the shift sequence */

	2371 if (oldIsTargetByteDBCS != isTargetByteDBCS ){

	2372

	2373 if (isTargetByteDBCS)

	2374 *target++ = UCNV_SO;

	2375 else

	2376 *target++ = UCNV_SI;

	2377 if(offsets)

	2378 *(offsets++) = (int32_t)(source - args->source-1);

	2379 }

	2380 /* write the targetUniChar to target */

	2381 if(targetByteUnit <= 0x00FF){

	2382 if( target < targetLimit){

	2383 *(target++) = (unsigned char) targetByteUnit;

	2384 if(offsets){

	2385 *(offsets++) = (int32_t)(source - args->source-1);

	2386 }

	2387

	2388 }else{

	2389 args->converter->charErrorBuffer[args->converter->charEr rorBufferLength++] = (unsigned char) (targetByteUnit);

	2390 *err = U_BUFFER_OVERFLOW_ERROR;

	2391 }

	2392 }else{

	2393 if(target < targetLimit){

	2394 *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80) ;

	2395 if(offsets){

	2396 *(offsets++) = (int32_t)(source - args->source-1);

	2397 }

	2398 if(target < targetLimit){

	2399 *(target++) =(unsigned char) (targetByteUnit -0x80);

	2400 if(offsets){

	2401 *(offsets++) = (int32_t)(source - args->source-1 );

	2402 }

	2403 }else{

	2404 args->converter->charErrorBuffer[args->converter->ch arErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);

	2405 *err = U_BUFFER_OVERFLOW_ERROR;

	2406 }

	2407 }else{

	2408 args->converter->charErrorBuffer[args->converter->charEr rorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);

	2409 args->converter->charErrorBuffer[args->converter->charEr rorBufferLength++] = (unsigned char) (targetByteUnit-0x80);

	2410 *err = U_BUFFER_OVERFLOW_ERROR;

	2411 }

	2412 }

	2413

	2414 }

	2415 else{

	2416 /* oops.. the code point is unassingned

	2417 * set the error and reason

	2418 */

	2419

	2420 /check if the char is a First surrogate/

	2421 if(UTF_IS_SURROGATE(sourceChar)) {

	2422 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

	2423 getTrail:

	2424 /look ahead to find the trail surrogate/

	2425 if(source < sourceLimit) {

	2426 /* test the following code unit */

	2427 UChar trail=(UChar) *source;

	2428 if(UTF_IS_SECOND_SURROGATE(trail)) {

	2429 source++;

	2430 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trai l);

	2431 *err = U_INVALID_CHAR_FOUND;

	2432 /* convert this surrogate code point */

	2433 /* exit this condition tree */

	2434 } else {

	2435 /* this is an unmatched lead code unit (1st surr ogate) */

	2436 /* callback(illegal) */

	2437 *err=U_ILLEGAL_CHAR_FOUND;

	2438 }

	2439 } else {

	2440 /* no more input */

	2441 *err = U_ZERO_ERROR;

	2442 }

	2443 } else {

	2444 /* this is an unmatched trail code unit (2nd surrogate) */

	2445 /* callback(illegal) */

	2446 *err=U_ILLEGAL_CHAR_FOUND;

	2447 }

	2448 } else {

	2449 /* callback(unassigned) for a BMP code point */

	2450 *err = U_INVALID_CHAR_FOUND;

	2451 }

	2452

	2453 args->converter->fromUChar32=sourceChar;

	2454 break;

	2455 }

	2456 } /* end if(myTargetIndex<myTargetLength) */

	2457 else{

	2458 *err =U_BUFFER_OVERFLOW_ERROR;

	2459 break;

	2460 }

	2461

	2462 }/* end while(mySourceIndex<mySourceLength) */

	2463

	2464 /*

	2465 * the end of the input stream and detection of truncated input

	2466 * are handled by the framework, but for ISO-2022-KR conversion

	2467 * we need to be in ASCII mode at the very end

	2468 *

	2469 * conditions:

	2470 * successful

	2471 * not in ASCII mode

	2472 * end of input and no truncated input

	2473 */

	2474 if( U_SUCCESS(*err) &&

	2475 isTargetByteDBCS &&

	2476 args->flush && source>=sourceLimit && args->converter->fromUChar32==0

	2477 ) {

	2478 int32_t sourceIndex;

	2479

	2480 /* we are switching to ASCII */

	2481 isTargetByteDBCS=FALSE;

	2482

	2483 /* get the source index of the last input character */

	2484 /*

	2485 * TODO this would be simpler and more reliable if we used a pair

	2486 * of sourceIndex/prevSourceIndex like in ucnvmbcs.c

	2487 * so that we could simply use the prevSourceIndex here;

	2488 * this code gives an incorrect result for the rare case of an unmatched

	2489 * trail surrogate that is alone in the last buffer of the text stream

	2490 */

	2491 sourceIndex=(int32_t)(source-args->source);

	2492 if(sourceIndex>0) {

	2493 --sourceIndex;

	2494 if( U16_IS_TRAIL(args->source[sourceIndex]) &&

	2495 (sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1]))

	2496 ) {

	2497 --sourceIndex;

	2498 }

	2499 } else {

	2500 sourceIndex=-1;

	2501 }

	2502

	2503 fromUWriteUInt8(

	2504 args->converter,

	2505 SHIFT_IN_STR, 1,

	2506 &target, (const char *)targetLimit,

	2507 &offsets, sourceIndex,

	2508 err);

	2509 }

	2510

	2511 /save the state and return /

	2512 args->source = source;

	2513 args->target = (char*)target;

	2514 args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;

	2515 }

	2516

	2517 /********************** To Unicode *************************************/

	2518

	2519 static void

	2520 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args ,

	2521 UErrorCode* err){

	2522 char const* sourceStart;

	2523 UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extr aInfo);

	2524

	2525 UConverterToUnicodeArgs subArgs;

	2526 int32_t minArgsSize;

	2527

	2528 /* set up the subconverter arguments */

	2529 if(args->size<sizeof(UConverterToUnicodeArgs)) {

	2530 minArgsSize = args->size;

	2531 } else {

	2532 minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);

	2533 }

	2534

	2535 uprv_memcpy(&subArgs, args, minArgsSize);

	2536 subArgs.size = (uint16_t)minArgsSize;

	2537 subArgs.converter = myData->currentConverter;

	2538

	2539 /* remember the original start of the input for offsets */

	2540 sourceStart = args->source;

	2541

	2542 if(myData->key != 0) {

	2543 /* continue with a partial escape sequence */

	2544 goto escape;

	2545 }

	2546

	2547 while(U_SUCCESS(*err) && args->source < args->sourceLimit) {

	2548 /Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/

	2549 subArgs.source = args->source;

	2550 subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceL imit, args->flush);

	2551 if(subArgs.source != subArgs.sourceLimit) {

	2552 /*

	2553 * get the current partial byte sequence

	2554 *

	2555 * it needs to be moved between the public and the subconverter

	2556 * so that the conversion framework, which only sees the public

	2557 * converter, can handle truncated and illegal input etc.

	2558 */

	2559 if(args->converter->toULength > 0) {

	2560 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUByt es, args->converter->toULength);

	2561 }

	2562 subArgs.converter->toULength = args->converter->toULength;

	2563

	2564 /*

	2565 * Convert up to the end of the input, or to before the next escape character.

	2566 * Does not handle conversion extensions because the preToU[] state etc.

	2567 * is not copied.

	2568 */

	2569 ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);

	2570

	2571 if(args->offsets != NULL && sourceStart != args->source) {

	2572 /* update offsets to base them on the actual start of the input */

	2573 int32_t *offsets = args->offsets;

	2574 UChar *target = args->target;

	2575 int32_t delta = (int32_t)(args->source - sourceStart);

	2576 while(target < subArgs.target) {

	2577 if(*offsets >= 0) {

	2578 *offsets += delta;

	2579 }

	2580 ++offsets;

	2581 ++target;

	2582 }

	2583 }

	2584 args->source = subArgs.source;

	2585 args->target = subArgs.target;

	2586 args->offsets = subArgs.offsets;

	2587

	2588 /* copy input/error/overflow buffers */

	2589 if(subArgs.converter->toULength > 0) {

	2590 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUByt es, subArgs.converter->toULength);

	2591 }

	2592 args->converter->toULength = subArgs.converter->toULength;

	2593

	2594 if(*err == U_BUFFER_OVERFLOW_ERROR) {

	2595 if(subArgs.converter->UCharErrorBufferLength > 0) {

	2596 uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.conve rter->UCharErrorBuffer,

	2597 subArgs.converter->UCharErrorBufferLength);

	2598 }

	2599 args->converter->UCharErrorBufferLength=subArgs.converter->UChar ErrorBufferLength;

	2600 subArgs.converter->UCharErrorBufferLength = 0;

	2601 }

	2602 }

	2603

	2604 if (U_FAILURE(*err) \|\| (args->source == args->sourceLimit)) {

	2605 return;

	2606 }

	2607

	2608 escape:

	2609 changeState_2022(args->converter,

	2610 &(args->source),

	2611 args->sourceLimit,

	2612 ISO_2022_KR,

	2613 err);

	2614 }

	2615 }

	2616

	2617 static void

	2618 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

	2619 UErrorCode* err){

	2620 char tempBuf[2];

	2621 const char mySource = ( char ) args->source;

	2622 UChar *myTarget = args->target;

	2623 const char *mySourceLimit = args->sourceLimit;

	2624 UChar32 targetUniChar = 0x0000;

	2625 UChar mySourceChar = 0x0000;

	2626 UConverterDataISO2022* myData;

	2627 UConverterSharedData* sharedData ;

	2628 UBool useFallback;

	2629

	2630 myData=(UConverterDataISO2022*)(args->converter->extraInfo);

	2631 if(myData->version==1){

	2632 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);

	2633 return;

	2634 }

	2635

	2636 /* initialize state */

	2637 sharedData = myData->currentConverter->sharedData;

	2638 useFallback = args->converter->useFallback;

	2639

	2640 if(myData->key != 0) {

	2641 /* continue with a partial escape sequence */

	2642 goto escape;

	2643 } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myT arget < args->targetLimit) {

	2644 /* continue with a partial double-byte character */

	2645 mySourceChar = args->converter->toUBytes[0];

	2646 args->converter->toULength = 0;

	2647 goto getTrailByte;

	2648 }

	2649

	2650 while(mySource< mySourceLimit){

	2651

	2652 if(myTarget < args->targetLimit){

	2653

	2654 mySourceChar= (unsigned char) *mySource++;

	2655

	2656 if(mySourceChar==UCNV_SI){

	2657 myData->toU2022State.g = 0;

	2658 if (myData->isEmptySegment) {

	2659 myData->isEmptySegment = FALSE; /* we are handling it, r eset to avoid future spurious errors */

	2660 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	2661 args->converter->toUCallbackReason = UCNV_IRREGULAR;

	2662 args->converter->toUBytes[0] = (uint8_t)mySourceChar;

	2663 args->converter->toULength = 1;

	2664 args->target = myTarget;

	2665 args->source = mySource;

	2666 return;

	2667 }

	2668 /consume the source /

	2669 continue;

	2670 }else if(mySourceChar==UCNV_SO){

	2671 myData->toU2022State.g = 1;

	2672 myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */

	2673 /consume the source /

	2674 continue;

	2675 }else if(mySourceChar==ESC_2022){

	2676 mySource--;

	2677 escape:

	2678 myData->isEmptySegment = FALSE; /* Any invalid ESC sequences wil l be detected separately, so just reset this */

	2679 changeState_2022(args->converter,&(mySource),

	2680 mySourceLimit, ISO_2022_KR, err);

	2681 if(U_FAILURE(*err)){

	2682 args->target = myTarget;

	2683 args->source = mySource;

	2684 return;

	2685 }

	2686 continue;

	2687 }

	2688

	2689 myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */

	2690 if(myData->toU2022State.g == 1) {

	2691 if(mySource < mySourceLimit) {

	2692 int leadIsOk, trailIsOk;

	2693 uint8_t trailByte;

	2694 getTrailByte:

	2695 targetUniChar = missingCharMarker;

	2696 trailByte = (uint8_t)*mySource;

	2697 /*

	2698 * Ticket 5691: consistent illegal sequences:

	2699 * - We include at least the first byte in the illegal seque nce.

	2700 * - If any of the non-initial bytes could be the start of a character,

	2701 * we stop the illegal sequence before the first one of th ose.

	2702 *

	2703 * In ISO-2022 DBCS, if the second byte is in the 21..7e ran ge or is

	2704 * an ESC/SO/SI, we report only the first byte as the illega l sequence.

	2705 * Otherwise we convert or report the pair of bytes.

	2706 */

	2707 leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);

	2708 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);

	2709 if (leadIsOk && trailIsOk) {

	2710 ++mySource;

	2711 tempBuf[0] = (char)(mySourceChar + 0x80);

	2712 tempBuf[1] = (char)(trailByte + 0x80);

	2713 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);

	2714 mySourceChar = (mySourceChar << 8) \| trailByte;

	2715 } else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {

	2716 /* report a pair of illegal bytes if the second byte is not a DBCS starter */

	2717 ++mySource;

	2718 /* add another bit so that the code below writes 2 bytes in case of error */

	2719 mySourceChar = 0x10000 \| (mySourceChar << 8) \| trailByte ;

	2720 }

	2721 } else {

	2722 args->converter->toUBytes[0] = (uint8_t)mySourceChar;

	2723 args->converter->toULength = 1;

	2724 break;

	2725 }

	2726 }

	2727 else if(mySourceChar <= 0x7f) {

	2728 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);

	2729 } else {

	2730 targetUniChar = 0xffff;

	2731 }

	2732 if(targetUniChar < 0xfffe){

	2733 if(args->offsets) {

	2734 args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

	2735 }

	2736 *(myTarget++)=(UChar)targetUniChar;

	2737 }

	2738 else {

	2739 /* Call the callback function*/

	2740 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err );

	2741 break;

	2742 }

	2743 }

	2744 else{

	2745 *err =U_BUFFER_OVERFLOW_ERROR;

	2746 break;

	2747 }

	2748 }

	2749 args->target = myTarget;

	2750 args->source = mySource;

	2751 }

	2752

	2753 /************************* END ISO2022-KR *******************************/

	2754

	2755 /************************* ISO-2022-CN *******************************

	2756 *

	2757 * Rules for ISO-2022-CN Encoding:

	2758 * i) The designator sequence must appear once on a line before any instance

	2759 * of character set it designates.

	2760 * ii) If two lines contain characters from the same character set, both lines

	2761 * must include the designator sequence.

	2762 * iii) Once the designator sequence is known, a shifting sequence has to be foun d

	2763 * to invoke the shifting

	2764 * iv) All lines start in ASCII and end in ASCII.

	2765 * v) Four shifting sequences are employed for this purpose:

	2766 *

	2767 * Sequcence ASCII Eq Charsets

	2768 * ---------- ------- ---------

	2769 * SI <SI> US-ASCII

	2770 * SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165

	2771 * SS2 <ESC>N CNS-11643-1992 Plane 2

	2772 * SS3 <ESC>O CNS-11643-1992 Planes 3-7

	2773 *

	2774 * vi)

	2775 * SOdesignator : ESC "$" ")" finalchar_for_SO

	2776 * SS2designator : ESC "$" "*" finalchar_for_SS2

	2777 * SS3designator : ESC "$" "+" finalchar_for_SS3

	2778 *

	2779 * ESC $ ) A Indicates the bytes following SO are Chinese

	2780 * characters as defined in GB 2312-80, until

	2781 * another SOdesignation appears

	2782 *

	2783 *

	2784 * ESC $ ) E Indicates the bytes following SO are as defined

	2785 * in ISO-IR-165 (for details, see section 2.1),

	2786 * until another SOdesignation appears

	2787 *

	2788 * ESC $ ) G Indicates the bytes following SO are as defined

	2789 * in CNS 11643-plane-1, until another

	2790 * SOdesignation appears

	2791 *

	2792 * ESC $ * H Indicates the two bytes immediately following

	2793 * SS2 is a Chinese character as defined in CNS

	2794 * 11643-plane-2, until another SS2designation

	2795 * appears

	2796 * (Meaning <ESC>N must preceed every 2 byte

	2797 * sequence.)

	2798 *

	2799 * ESC $ + I Indicates the immediate two bytes following SS3

	2800 * is a Chinese character as defined in CNS

	2801 * 11643-plane-3, until another SS3designation

	2802 * appears

	2803 * (Meaning <ESC>O must preceed every 2 byte

	2804 * sequence.)

	2805 *

	2806 * ESC $ + J Indicates the immediate two bytes following SS3

	2807 * is a Chinese character as defined in CNS

	2808 * 11643-plane-4, until another SS3designation

	2809 * appears

	2810 * (In English: <ESC>O must preceed every 2 byte

	2811 * sequence.)

	2812 *

	2813 * ESC $ + K Indicates the immediate two bytes following SS3

	2814 * is a Chinese character as defined in CNS

	2815 * 11643-plane-5, until another SS3designation

	2816 * appears

	2817 *

	2818 * ESC $ + L Indicates the immediate two bytes following SS3

	2819 * is a Chinese character as defined in CNS

	2820 * 11643-plane-6, until another SS3designation

	2821 * appears

	2822 *

	2823 * ESC $ + M Indicates the immediate two bytes following SS3

	2824 * is a Chinese character as defined in CNS

	2825 * 11643-plane-7, until another SS3designation

	2826 * appears

	2827 *

	2828 * As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and

	2829 * has its own designation information before any Chinese characters

	2830 * appear

	2831 *

	2832 */

	2833

	2834 /* The following are defined this way to make the strings truely readonly */

	2835 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";

	2836 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";

	2837 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";

	2838 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";

	2839 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";

	2840 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";

	2841 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";

	2842 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";

	2843 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";

	2844

	2845 /******************** ISO2022-CN Data ************************/

	2846 static const char* const escSeqCharsCN[10] ={

	2847 SHIFT_IN_STR, /* ASCII */

	2848 GB_2312_80_STR,

	2849 ISO_IR_165_STR,

	2850 CNS_11643_1992_Plane_1_STR,

	2851 CNS_11643_1992_Plane_2_STR,

	2852 CNS_11643_1992_Plane_3_STR,

	2853 CNS_11643_1992_Plane_4_STR,

	2854 CNS_11643_1992_Plane_5_STR,

	2855 CNS_11643_1992_Plane_6_STR,

	2856 CNS_11643_1992_Plane_7_STR

	2857 };

	2858

	2859 static void

	2860 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args , UErrorCode* err){

	2861 UConverter *cnv = args->converter;

	2862 UConverterDataISO2022 *converterData;

	2863 ISO2022State *pFromU2022State;

	2864 uint8_t target = (uint8_t ) args->target;

	2865 const uint8_t targetLimit = (const uint8_t ) args->targetLimit;

	2866 const UChar* source = args->source;

	2867 const UChar* sourceLimit = args->sourceLimit;

	2868 int32_t* offsets = args->offsets;

	2869 UChar32 sourceChar;

	2870 char buffer[8];

	2871 int32_t len;

	2872 int8_t choices[3];

	2873 int32_t choiceCount;

	2874 uint32_t targetValue = 0;

	2875 UBool useFallback;

	2876

	2877 /* set up the state */

	2878 converterData = (UConverterDataISO2022*)cnv->extraInfo;

	2879 pFromU2022State = &converterData->fromU2022State;

	2880

	2881 choiceCount = 0;

	2882

	2883 /* check if the last codepoint of previous buffer was a lead surrogate*/

	2884 if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {

	2885 goto getTrail;

	2886 }

	2887

	2888 while( source < sourceLimit){

	2889 if(target < targetLimit){

	2890

	2891 sourceChar = *(source++);

	2892 /check if the char is a First surrogate/

	2893 if(UTF_IS_SURROGATE(sourceChar)) {

	2894 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

	2895 getTrail:

	2896 /look ahead to find the trail surrogate/

	2897 if(source < sourceLimit) {

	2898 /* test the following code unit */

	2899 UChar trail=(UChar) *source;

	2900 if(UTF_IS_SECOND_SURROGATE(trail)) {

	2901 source++;

	2902 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

	2903 cnv->fromUChar32=0x00;

	2904 /* convert this supplementary code point */

	2905 /* exit this condition tree */

	2906 } else {

	2907 /* this is an unmatched lead code unit (1st surrogat e) */

	2908 /* callback(illegal) */

	2909 *err=U_ILLEGAL_CHAR_FOUND;

	2910 cnv->fromUChar32=sourceChar;

	2911 break;

	2912 }

	2913 } else {

	2914 /* no more input */

	2915 cnv->fromUChar32=sourceChar;

	2916 break;

	2917 }

	2918 } else {

	2919 /* this is an unmatched trail code unit (2nd surrogate) */

	2920 /* callback(illegal) */

	2921 *err=U_ILLEGAL_CHAR_FOUND;

	2922 cnv->fromUChar32=sourceChar;

	2923 break;

	2924 }

	2925 }

	2926

	2927 /* do the conversion */

	2928 if(sourceChar <= 0x007f ){

	2929 /* do not convert SO/SI/ESC */

	2930 if(IS_2022_CONTROL(sourceChar)) {

	2931 /* callback(illegal) */

	2932 *err=U_ILLEGAL_CHAR_FOUND;

	2933 cnv->fromUChar32=sourceChar;

	2934 break;

	2935 }

	2936

	2937 /* US-ASCII */

	2938 if(pFromU2022State->g == 0) {

	2939 buffer[0] = (char)sourceChar;

	2940 len = 1;

	2941 } else {

	2942 buffer[0] = UCNV_SI;

	2943 buffer[1] = (char)sourceChar;

	2944 len = 2;

	2945 pFromU2022State->g = 0;

	2946 choiceCount = 0;

	2947 }

	2948 if(sourceChar == CR \|\| sourceChar == LF) {

	2949 /* reset the state at the end of a line */

	2950 uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));

	2951 choiceCount = 0;

	2952 }

	2953 }

	2954 else{

	2955 /* convert U+0080..U+10ffff */

	2956 int32_t i;

	2957 int8_t cs, g;

	2958

	2959 if(choiceCount == 0) {

	2960 /* try the current SO/G1 converter first */

	2961 choices[0] = pFromU2022State->cs[1];

	2962

	2963 /* default to GB2312_1 if none is designated yet */

	2964 if(choices[0] == 0) {

	2965 choices[0] = GB2312_1;

	2966 }

	2967

	2968 if(converterData->version == 0) {

	2969 /* ISO-2022-CN */

	2970

	2971 /* try the other SO/G1 converter; a CNS_11643_1 lookup m ay result in any plane */

	2972 if(choices[0] == GB2312_1) {

	2973 choices[1] = (int8_t)CNS_11643_1;

	2974 } else {

	2975 choices[1] = (int8_t)GB2312_1;

	2976 }

	2977

	2978 choiceCount = 2;

	2979 } else if (converterData->version == 1) {

	2980 /* ISO-2022-CN-EXT */

	2981

	2982 /* try one of the other converters */

	2983 switch(choices[0]) {

	2984 case GB2312_1:

	2985 choices[1] = (int8_t)CNS_11643_1;

	2986 choices[2] = (int8_t)ISO_IR_165;

	2987 break;

	2988 case ISO_IR_165:

	2989 choices[1] = (int8_t)GB2312_1;

	2990 choices[2] = (int8_t)CNS_11643_1;

	2991 break;

	2992 default: /* CNS_11643_x */

	2993 choices[1] = (int8_t)GB2312_1;

	2994 choices[2] = (int8_t)ISO_IR_165;

	2995 break;

	2996 }

	2997

	2998 choiceCount = 3;

	2999 } else {

	3000 choices[0] = (int8_t)CNS_11643_1;

	3001 choices[1] = (int8_t)GB2312_1;

	3002 }

	3003 }

	3004

	3005 cs = g = 0;

	3006 /*

	3007 * len==0: no mapping found yet

	3008 * len<0: found a fallback result: continue looking for a roundt rip but no further fallbacks

	3009 * len>0: found a roundtrip result, done

	3010 */

	3011 len = 0;

	3012 /*

	3013 * We will turn off useFallback after finding a fallback,

	3014 * but we still get fallbacks from PUA code points as usual.

	3015 * Therefore, we will also need to check that we don't overwrite

	3016 * an early fallback with a later one.

	3017 */

	3018 useFallback = cnv->useFallback;

	3019

	3020 for(i = 0; i < choiceCount && len <= 0; ++i) {

	3021 int8_t cs0 = choices[i];

	3022 if(cs0 > 0) {

	3023 uint32_t value;

	3024 int32_t len2;

	3025 if(cs0 >= CNS_11643_0) {

	3026 len2 = MBCS_FROM_UCHAR32_ISO2022(

	3027 converterData->myConverterArray[CNS_1164 3],

	3028 sourceChar,

	3029 &value,

	3030 useFallback,

	3031 MBCS_OUTPUT_3);

	3032 if(len2 == 3 \|\| (len2 == -3 && len == 0)) {

	3033 targetValue = value;

	3034 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80 );

	3035 if(len2 >= 0) {

	3036 len = 2;

	3037 } else {

	3038 len = -2;

	3039 useFallback = FALSE;

	3040 }

	3041 if(cs == CNS_11643_1) {

	3042 g = 1;

	3043 } else if(cs == CNS_11643_2) {

	3044 g = 2;

	3045 } else /* plane 3..7 */ if(converterData->versio n == 1) {

	3046 g = 3;

	3047 } else {

	3048 /* ISO-2022-CN (without -EXT) does not suppo rt plane 3..7 */

	3049 len = 0;

	3050 }

	3051 }

	3052 } else {

	3053 /* GB2312_1 or ISO-IR-165 */

	3054 len2 = MBCS_FROM_UCHAR32_ISO2022(

	3055 converterData->myConverterArray[cs0],

	3056 sourceChar,

	3057 &value,

	3058 useFallback,

	3059 MBCS_OUTPUT_2);

	3060 if(len2 == 2 \|\| (len2 == -2 && len == 0)) {

	3061 targetValue = value;

	3062 len = len2;

	3063 cs = cs0;

	3064 g = 1;

	3065 useFallback = FALSE;

	3066 }

	3067 }

	3068 }

	3069 }

	3070

	3071 if(len != 0) {

	3072 len = 0; /* count output bytes; it must have been abs(len) = = 2 */

	3073

	3074 /* write the designation sequence if necessary */

	3075 if(cs != pFromU2022State->cs[g]) {

	3076 if(cs < CNS_11643) {

	3077 uprv_memcpy(buffer, escSeqCharsCN[cs], 4);

	3078 } else {

	3079 uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);

	3080 }

	3081 len = 4;

	3082 pFromU2022State->cs[g] = cs;

	3083 if(g == 1) {

	3084 /* changing the SO/G1 charset invalidates the choice s[] */

	3085 choiceCount = 0;

	3086 }

	3087 }

	3088

	3089 /* write the shift sequence if necessary */

	3090 if(g != pFromU2022State->g) {

	3091 switch(g) {

	3092 case 1:

	3093 buffer[len++] = UCNV_SO;

	3094

	3095 /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */

	3096 pFromU2022State->g = 1;

	3097 break;

	3098 case 2:

	3099 buffer[len++] = 0x1b;

	3100 buffer[len++] = 0x4e;

	3101 break;

	3102 default: /* case 3 */

	3103 buffer[len++] = 0x1b;

	3104 buffer[len++] = 0x4f;

	3105 break;

	3106 }

	3107 }

	3108

	3109 /* write the two output bytes */

	3110 buffer[len++] = (char)(targetValue >> 8);

	3111 buffer[len++] = (char)targetValue;

	3112 } else {

	3113 /* if we cannot find the character after checking all codepa ges

	3114 * then this is an error

	3115 */

	3116 *err = U_INVALID_CHAR_FOUND;

	3117 cnv->fromUChar32=sourceChar;

	3118 break;

	3119 }

	3120 }

	3121

	3122 /* output len>0 bytes in buffer[] */

	3123 if(len == 1) {

	3124 *target++ = buffer[0];

	3125 if(offsets) {

	3126 offsets++ = (int32_t)(source - args->source - 1); / -1: kn own to be ASCII */

	3127 }

	3128 } else if(len == 2 && (target + 2) <= targetLimit) {

	3129 *target++ = buffer[0];

	3130 *target++ = buffer[1];

	3131 if(offsets) {

	3132 int32_t sourceIndex = (int32_t)(source - args->source - U16_ LENGTH(sourceChar));

	3133 *offsets++ = sourceIndex;

	3134 *offsets++ = sourceIndex;

	3135 }

	3136 } else {

	3137 fromUWriteUInt8(

	3138 cnv,

	3139 buffer, len,

	3140 &target, (const char *)targetLimit,

	3141 &offsets, (int32_t)(source - args->source - U16_LENGTH(sourc eChar)),

	3142 err);

	3143 if(U_FAILURE(*err)) {

	3144 break;

	3145 }

	3146 }

	3147 } /* end if(myTargetIndex<myTargetLength) */

	3148 else{

	3149 *err =U_BUFFER_OVERFLOW_ERROR;

	3150 break;

	3151 }

	3152

	3153 }/* end while(mySourceIndex<mySourceLength) */

	3154

	3155 /*

	3156 * the end of the input stream and detection of truncated input

	3157 * are handled by the framework, but for ISO-2022-CN conversion

	3158 * we need to be in ASCII mode at the very end

	3159 *

	3160 * conditions:

	3161 * successful

	3162 * not in ASCII mode

	3163 * end of input and no truncated input

	3164 */

	3165 if( U_SUCCESS(*err) &&

	3166 pFromU2022State->g!=0 &&

	3167 args->flush && source>=sourceLimit && cnv->fromUChar32==0

	3168 ) {

	3169 int32_t sourceIndex;

	3170

	3171 /* we are switching to ASCII */

	3172 pFromU2022State->g=0;

	3173

	3174 /* get the source index of the last input character */

	3175 /*

	3176 * TODO this would be simpler and more reliable if we used a pair

	3177 * of sourceIndex/prevSourceIndex like in ucnvmbcs.c

	3178 * so that we could simply use the prevSourceIndex here;

	3179 * this code gives an incorrect result for the rare case of an unmatched

	3180 * trail surrogate that is alone in the last buffer of the text stream

	3181 */

	3182 sourceIndex=(int32_t)(source-args->source);

	3183 if(sourceIndex>0) {

	3184 --sourceIndex;

	3185 if( U16_IS_TRAIL(args->source[sourceIndex]) &&

	3186 (sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1]))

	3187 ) {

	3188 --sourceIndex;

	3189 }

	3190 } else {

	3191 sourceIndex=-1;

	3192 }

	3193

	3194 fromUWriteUInt8(

	3195 cnv,

	3196 SHIFT_IN_STR, 1,

	3197 &target, (const char *)targetLimit,

	3198 &offsets, sourceIndex,

	3199 err);

	3200 }

	3201

	3202 /save the state and return /

	3203 args->source = source;

	3204 args->target = (char*)target;

	3205 }

	3206

	3207

	3208 static void

	3209 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

	3210 UErrorCode* err){

	3211 char tempBuf[3];

	3212 const char mySource = (char ) args->source;

	3213 UChar *myTarget = args->target;

	3214 const char *mySourceLimit = args->sourceLimit;

	3215 uint32_t targetUniChar = 0x0000;

	3216 uint32_t mySourceChar = 0x0000;

	3217 UConverterDataISO2022* myData;

	3218 ISO2022State *pToU2022State;

	3219

	3220 myData=(UConverterDataISO2022*)(args->converter->extraInfo);

	3221 pToU2022State = &myData->toU2022State;

	3222

	3223 if(myData->key != 0) {

	3224 /* continue with a partial escape sequence */

	3225 goto escape;

	3226 } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myT arget < args->targetLimit) {

	3227 /* continue with a partial double-byte character */

	3228 mySourceChar = args->converter->toUBytes[0];

	3229 args->converter->toULength = 0;

	3230 targetUniChar = missingCharMarker;

	3231 goto getTrailByte;

	3232 }

	3233

	3234 while(mySource < mySourceLimit){

	3235

	3236 targetUniChar =missingCharMarker;

	3237

	3238 if(myTarget < args->targetLimit){

	3239

	3240 mySourceChar= (unsigned char) *mySource++;

	3241

	3242 switch(mySourceChar){

	3243 case UCNV_SI:

	3244 pToU2022State->g=0;

	3245 if (myData->isEmptySegment) {

	3246 myData->isEmptySegment = FALSE; /* we are handling it, r eset to avoid future spurious errors */

	3247 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	3248 args->converter->toUCallbackReason = UCNV_IRREGULAR;

	3249 args->converter->toUBytes[0] = mySourceChar;

	3250 args->converter->toULength = 1;

	3251 args->target = myTarget;

	3252 args->source = mySource;

	3253 return;

	3254 }

	3255 continue;

	3256

	3257 case UCNV_SO:

	3258 if(pToU2022State->cs[1] != 0) {

	3259 pToU2022State->g=1;

	3260 myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */

	3261 continue;

	3262 } else {

	3263 /* illegal to have SO before a matching designator */

	3264 myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */

	3265 break;

	3266 }

	3267

	3268 case ESC_2022:

	3269 mySource--;

	3270 escape:

	3271 {

	3272 const char * mySourceBefore = mySource;

	3273 int8_t toULengthBefore = args->converter->toULength;

	3274

	3275 changeState_2022(args->converter,&(mySource),

	3276 mySourceLimit, ISO_2022_CN,err);

	3277

	3278 /* After SO there must be at least one character before a de signator (designator error handled separately) */

	3279 if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegme nt) {

	3280 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

	3281 args->converter->toUCallbackReason = UCNV_IRREGULAR;

	3282 args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));

	3283 }

	3284 }

	3285

	3286 /* invalid or illegal escape sequence */

	3287 if(U_FAILURE(*err)){

	3288 args->target = myTarget;

	3289 args->source = mySource;

	3290 myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */

	3291 return;

	3292 }

	3293 continue;

	3294

	3295 /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */

	3296

	3297 case CR:

	3298 /falls through/

	3299 case LF:

	3300 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));

	3301 /* falls through */

	3302 default:

	3303 /* convert one or two bytes */

	3304 myData->isEmptySegment = FALSE;

	3305 if(pToU2022State->g != 0) {

	3306 if(mySource < mySourceLimit) {

	3307 UConverterSharedData *cnv;

	3308 StateEnum tempState;

	3309 int32_t tempBufLen;

	3310 int leadIsOk, trailIsOk;

	3311 uint8_t trailByte;

	3312 getTrailByte:

	3313 trailByte = (uint8_t)*mySource;

	3314 /*

	3315 * Ticket 5691: consistent illegal sequences:

	3316 * - We include at least the first byte in the illegal s equence.

	3317 * - If any of the non-initial bytes could be the start of a character,

	3318 * we stop the illegal sequence before the first one o f those.

	3319 *

	3320 * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is

	3321 * an ESC/SO/SI, we report only the first byte as the il legal sequence.

	3322 * Otherwise we convert or report the pair of bytes.

	3323 */

	3324 leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x2 1);

	3325 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ;

	3326 if (leadIsOk && trailIsOk) {

	3327 ++mySource;

	3328 tempState = (StateEnum)pToU2022State->cs[pToU2022Sta te->g];

	3329 if(tempState >= CNS_11643_0) {

	3330 cnv = myData->myConverterArray[CNS_11643];

	3331 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0 ));

	3332 tempBuf[1] = (char) (mySourceChar);

	3333 tempBuf[2] = (char) trailByte;

	3334 tempBufLen = 3;

	3335

	3336 }else{

	3337 cnv = myData->myConverterArray[tempState];

	3338 tempBuf[0] = (char) (mySourceChar);

	3339 tempBuf[1] = (char) trailByte;

	3340 tempBufLen = 2;

	3341 }

	3342 targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tem pBuf, tempBufLen, FALSE);

	3343 mySourceChar = (mySourceChar << 8) \| trailByte;

	3344 } else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {

	3345 /* report a pair of illegal bytes if the second byte is not a DBCS starter */

	3346 ++mySource;

	3347 /* add another bit so that the code below writes 2 b ytes in case of error */

	3348 mySourceChar = 0x10000 \| (mySourceChar << 8) \| trail Byte;

	3349 }

	3350 if(pToU2022State->g>=2) {

	3351 /* return from a single-shift state to the previous one */

	3352 pToU2022State->g=pToU2022State->prevG;

	3353 }

	3354 } else {

	3355 args->converter->toUBytes[0] = (uint8_t)mySourceChar;

	3356 args->converter->toULength = 1;

	3357 goto endloop;

	3358 }

	3359 }

	3360 else{

	3361 if(mySourceChar <= 0x7f) {

	3362 targetUniChar = (UChar) mySourceChar;

	3363 }

	3364 }

	3365 break;

	3366 }

	3367 if(targetUniChar < (missingCharMarker-1/0xfffe/)){

	3368 if(args->offsets){

	3369 args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

	3370 }

	3371 *(myTarget++)=(UChar)targetUniChar;

	3372 }

	3373 else if(targetUniChar > missingCharMarker){

	3374 /* disassemble the surrogate pair and write to output*/

	3375 targetUniChar-=0x0010000;

	3376 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));

	3377 if(args->offsets){

	3378 args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

	3379 }

	3380 ++myTarget;

	3381 if(myTarget< args->targetLimit){

	3382 *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

	3383 if(args->offsets){

	3384 args->offsets[myTarget - args->target] = (int32_t)(mySou rce - args->source - (mySourceChar <= 0xff ? 1 : 2));

	3385 }

	3386 ++myTarget;

	3387 }else{

	3388 args->converter->UCharErrorBuffer[args->converter->UCharErro rBufferLength++]=

	3389 (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)) ;

	3390 }

	3391

	3392 }

	3393 else{

	3394 /* Call the callback function*/

	3395 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err );

	3396 break;

	3397 }

	3398 }

	3399 else{

	3400 *err =U_BUFFER_OVERFLOW_ERROR;

	3401 break;

	3402 }

	3403 }

	3404 endloop:

	3405 args->target = myTarget;

	3406 args->source = mySource;

	3407 }

	3408

	3409 static void

	3410 _ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorC ode err) {

	3411 UConverter *cnv = args->converter;

	3412 UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->extraI nfo;

	3413 ISO2022State *pFromU2022State=&myConverterData->fromU2022State;

	3414 char p, subchar;

	3415 char buffer[8];

	3416 int32_t length;

	3417

	3418 subchar=(char *)cnv->subChars;

	3419 length=cnv->subCharLen; /* assume length==1 for most variants */

	3420

	3421 p = buffer;

	3422 switch(myConverterData->locale[0]){

	3423 case 'j':

	3424 {

	3425 int8_t cs;

	3426

	3427 if(pFromU2022State->g == 1) {

	3428 /* JIS7: switch from G1 to G0 */

	3429 pFromU2022State->g = 0;

	3430 *p++ = UCNV_SI;

	3431 }

	3432

	3433 cs = pFromU2022State->cs[0];

	3434 if(cs != ASCII && cs != JISX201) {

	3435 /* not in ASCII or JIS X 0201: switch to ASCII */

	3436 pFromU2022State->cs[0] = (int8_t)ASCII;

	3437 *p++ = '\x1b';

	3438 *p++ = '\x28';

	3439 *p++ = '\x42';

	3440 }

	3441

	3442 *p++ = subchar[0];

	3443 break;

	3444 }

	3445 case 'c':

	3446 if(pFromU2022State->g != 0) {

	3447 /* not in ASCII mode: switch to ASCII */

	3448 pFromU2022State->g = 0;

	3449 *p++ = UCNV_SI;

	3450 }

	3451 *p++ = subchar[0];

	3452 break;

	3453 case 'k':

	3454 if(myConverterData->version == 0) {

	3455 if(length == 1) {

	3456 if((UBool)args->converter->fromUnicodeStatus) {

	3457 /* in DBCS mode: switch to SBCS */

	3458 args->converter->fromUnicodeStatus = 0;

	3459 *p++ = UCNV_SI;

	3460 }

	3461 *p++ = subchar[0];

	3462 } else /* length == 2*/ {

	3463 if(!(UBool)args->converter->fromUnicodeStatus) {

	3464 /* in SBCS mode: switch to DBCS */

	3465 args->converter->fromUnicodeStatus = 1;

	3466 *p++ = UCNV_SO;

	3467 }

	3468 *p++ = subchar[0];

	3469 *p++ = subchar[1];

	3470 }

	3471 break;

	3472 } else {

	3473 /* save the subconverter's substitution string */

	3474 uint8_t *currentSubChars = myConverterData->currentConverter->subCha rs;

	3475 int8_t currentSubCharLen = myConverterData->currentConverter->subCha rLen;

	3476

	3477 /* set our substitution string into the subconverter */

	3478 myConverterData->currentConverter->subChars = (uint8_t *)subchar;

	3479 myConverterData->currentConverter->subCharLen = (int8_t)length;

	3480

	3481 /* let the subconverter write the subchar, set/retrieve fromUChar32 state */

	3482 args->converter = myConverterData->currentConverter;

	3483 myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;

	3484 ucnv_cbFromUWriteSub(args, 0, err);

	3485 cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;

	3486 args->converter = cnv;

	3487

	3488 /* restore the subconverter's substitution string */

	3489 myConverterData->currentConverter->subChars = currentSubChars;

	3490 myConverterData->currentConverter->subCharLen = currentSubCharLen;

	3491

	3492 if(*err == U_BUFFER_OVERFLOW_ERROR) {

	3493 if(myConverterData->currentConverter->charErrorBufferLength > 0) {

	3494 uprv_memcpy(

	3495 cnv->charErrorBuffer,

	3496 myConverterData->currentConverter->charErrorBuffer,

	3497 myConverterData->currentConverter->charErrorBufferLength );

	3498 }

	3499 cnv->charErrorBufferLength = myConverterData->currentConverter-> charErrorBufferLength;

	3500 myConverterData->currentConverter->charErrorBufferLength = 0;

	3501 }

	3502 return;

	3503 }

	3504 default:

	3505 /* not expected */

	3506 break;

	3507 }

	3508 ucnv_cbFromUWriteBytes(args,

	3509 buffer, (int32_t)(p - buffer),

	3510 offsetIndex, err);

	3511 }

	3512

	3513 /*

	3514 * Structure for cloning an ISO 2022 converter into a single memory block.

	3515 * ucnv_safeClone() of the converter will align the entire cloneStruct,

	3516 * and then ucnv_safeClone() of the sub-converter may additionally align

	3517 * currentConverter inside the cloneStruct, for which we need the deadSpace

	3518 * after currentConverter.

	3519 * This is because UAlignedMemory may be larger than the actually

	3520 * necessary alignment size for the platform.

	3521 * The other cloneStruct fields will not be moved around,

	3522 * and are aligned properly with cloneStruct's alignment.

	3523 */

	3524 struct cloneStruct

	3525 {

	3526 UConverter cnv;

	3527 UConverter currentConverter;

	3528 UAlignedMemory deadSpace;

	3529 UConverterDataISO2022 mydata;

	3530 };

	3531

	3532

	3533 static UConverter *

	3534 _ISO_2022_SafeClone(

	3535 const UConverter *cnv,

	3536 void *stackBuffer,

	3537 int32_t *pBufferSize,

	3538 UErrorCode *status)

	3539 {

	3540 struct cloneStruct * localClone;

	3541 UConverterDataISO2022 *cnvData;

	3542 int32_t i, size;

	3543

	3544 if (pBufferSize == 0) { / 'preflighting' request - set needed size into p BufferSize /

	3545 *pBufferSize = (int32_t)sizeof(struct cloneStruct);

	3546 return NULL;

	3547 }

	3548

	3549 cnvData = (UConverterDataISO2022 *)cnv->extraInfo;

	3550 localClone = (struct cloneStruct *)stackBuffer;

	3551

	3552 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */

	3553

	3554 uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));

	3555 localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra dat a */

	3556 localClone->cnv.isExtraLocal = TRUE;

	3557

	3558 /* share the subconverters */

	3559

	3560 if(cnvData->currentConverter != NULL) {

	3561 size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* includ e size of padding */

	3562 localClone->mydata.currentConverter =

	3563 ucnv_safeClone(cnvData->currentConverter,

	3564 &localClone->currentConverter,

	3565 &size, status);

	3566 if(U_FAILURE(*status)) {

	3567 return NULL;

	3568 }

	3569 }

	3570

	3571 for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {

	3572 if(cnvData->myConverterArray[i] != NULL) {

	3573 ucnv_incrementRefCount(cnvData->myConverterArray[i]);

	3574 }

	3575 }

	3576

	3577 return &localClone->cnv;

	3578 }

	3579

	3580 static void

	3581 _ISO_2022_GetUnicodeSet(const UConverter *cnv,

	3582 const USetAdder *sa,

	3583 UConverterUnicodeSet which,

	3584 UErrorCode *pErrorCode)

	3585 {

	3586 int32_t i;

	3587 UConverterDataISO2022* cnvData;

	3588

	3589 if (U_FAILURE(*pErrorCode)) {

	3590 return;

	3591 }

	3592 #ifdef U_ENABLE_GENERIC_ISO_2022

	3593 if (cnv->sharedData == &_ISO2022Data) {

	3594 /* We use UTF-8 in this case */

	3595 sa->addRange(sa->set, 0, 0xd7FF);

	3596 sa->addRange(sa->set, 0xE000, 0x10FFFF);

	3597 return;

	3598 }

	3599 #endif

	3600

	3601 cnvData = (UConverterDataISO2022*)cnv->extraInfo;

	3602

	3603 /* open a set and initialize it with code points that are algorithmically ro und-tripped */

	3604 switch(cnvData->locale[0]){

	3605 case 'j':

	3606 /* include JIS X 0201 which is hardcoded */

	3607 sa->add(sa->set, 0xa5);

	3608 sa->add(sa->set, 0x203e);

	3609 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {

	3610 /* include Latin-1 for some variants of JP */

	3611 sa->addRange(sa->set, 0, 0xff);

	3612 } else {

	3613 /* include ASCII for JP */

	3614 sa->addRange(sa->set, 0, 0x7f);

	3615 }

	3616 if(cnvData->version==3 \|\| cnvData->version==4 \|\| which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) {

	3617 /*

	3618 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0

	3619 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)

	3620 * use half-width Katakana.

	3621 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)

	3622 * half-width Katakana via the ESC ( I sequence.

	3623 * However, we only emit (fromUnicode) half-width Katakana according to the

	3624 * definition of each variant.

	3625 *

	3626 * When including fallbacks,

	3627 * we need to include half-width Katakana Unicode code points for al l JP variants because

	3628 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana).

	3629 */

	3630 /* include half-width Katakana for JP */

	3631 sa->addRange(sa->set, HWKANA_START, HWKANA_END);

	3632 }

	3633 break;

	3634 case 'c':

	3635 case 'z':

	3636 /* include ASCII for CN */

	3637 sa->addRange(sa->set, 0, 0x7f);

	3638 break;

	3639 case 'k':

	3640 /* there is only one converter for KR, and it is not in the myConverterA rray[] */

	3641 cnvData->currentConverter->sharedData->impl->getUnicodeSet(

	3642 cnvData->currentConverter, sa, which, pErrorCode);

	3643 /* the loop over myConverterArray[] will simply not find another convert er */

	3644 break;

	3645 default:

	3646 break;

	3647 }

	3648

	3649 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem ent ucnv_getUnicodeSet() with reverse fallbacks. */

	3650 if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&

	3651 cnvData->version==0 && i==CNS_11643

	3652 ) {

	3653 /* special handling for non-EXT ISO-2022-CN: add only code point s for CNS planes 1 and 2 */

	3654 ucnv_MBCSGetUnicodeSetForBytes(

	3655 cnvData->myConverterArray[i],

	3656 sa, UCNV_ROUNDTRIP_SET,

	3657 0, 0x81, 0x82,

	3658 pErrorCode);

	3659 }

	3660 #endif

	3661

	3662 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

	3663 UConverterSetFilter filter;

	3664 if(cnvData->myConverterArray[i]!=NULL) {

	3665 if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&

	3666 cnvData->version==0 && i==CNS_11643

	3667 ) {

	3668 /*

	3669 * Version-specific for CN:

	3670 * CN version 0 does not map CNS planes 3..7 although

	3671 * they are all available in the CNS conversion table;

	3672 * CN version 1 (-EXT) does map them all.

	3673 * The two versions create different Unicode sets.

	3674 */

	3675 filter=UCNV_SET_FILTER_2022_CN;

	3676 } else if(cnvData->locale[0]=='j' && i==JISX208) {

	3677 /*

	3678 * Only add code points that map to Shift-JIS codes

	3679 * corresponding to JIS X 0208.

	3680 */

	3681 filter=UCNV_SET_FILTER_SJIS;

	3682 } else if(i==KSC5601) {

	3683 /*

	3684 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)

	3685 * are broader than GR94.

	3686 */

	3687 filter=UCNV_SET_FILTER_GR94DBCS;

	3688 } else {

	3689 filter=UCNV_SET_FILTER_NONE;

	3690 }

	3691 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode);

	3692 }

	3693 }

	3694

	3695 /*

	3696 * ISO 2022 converters must not convert SO/SI/ESC despite what

	3697 * sub-converters do by themselves.

	3698 * Remove these characters from the set.

	3699 */

	3700 sa->remove(sa->set, 0x0e);

	3701 sa->remove(sa->set, 0x0f);

	3702 sa->remove(sa->set, 0x1b);

	3703

	3704 /* ISO 2022 converters do not convert C1 controls either */

	3705 sa->removeRange(sa->set, 0x80, 0x9f);

	3706 }

	3707

	3708 static const UConverterImpl _ISO2022Impl={

	3709 UCNV_ISO_2022,

	3710

	3711 NULL,

	3712 NULL,

	3713

	3714 _ISO2022Open,

	3715 _ISO2022Close,

	3716 _ISO2022Reset,

	3717

	3718 #ifdef U_ENABLE_GENERIC_ISO_2022

	3719 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,

	3720 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,

	3721 ucnv_fromUnicode_UTF8,

	3722 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,

	3723 #else

	3724 NULL,

	3725 NULL,

	3726 NULL,

	3727 NULL,

	3728 #endif

	3729 NULL,

	3730

	3731 NULL,

	3732 _ISO2022getName,

	3733 _ISO_2022_WriteSub,

	3734 _ISO_2022_SafeClone,

	3735 _ISO_2022_GetUnicodeSet

	3736 };

	3737 static const UConverterStaticData _ISO2022StaticData={

	3738 sizeof(UConverterStaticData),

	3739 "ISO_2022",

	3740 2022,

	3741 UCNV_IBM,

	3742 UCNV_ISO_2022,

	3743 1,

	3744 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */

	3745 { 0x1a, 0, 0, 0 },

	3746 1,

	3747 FALSE,

	3748 FALSE,

	3749 0,

	3750 0,

	3751 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

	3752 };

	3753 const UConverterSharedData _ISO2022Data={

	3754 sizeof(UConverterSharedData),

	3755 ~((uint32_t) 0),

	3756 NULL,

	3757 NULL,

	3758 &_ISO2022StaticData,

	3759 FALSE,

	3760 &_ISO2022Impl,

	3761 0

	3762 };

	3763

	3764 /***********JP**************/

	3765 static const UConverterImpl _ISO2022JPImpl={

	3766 UCNV_ISO_2022,

	3767

	3768 NULL,

	3769 NULL,

	3770

	3771 _ISO2022Open,

	3772 _ISO2022Close,

	3773 _ISO2022Reset,

	3774

	3775 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,

	3776 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,

	3777 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,

	3778 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,

	3779 NULL,

	3780

	3781 NULL,

	3782 _ISO2022getName,

	3783 _ISO_2022_WriteSub,

	3784 _ISO_2022_SafeClone,

	3785 _ISO_2022_GetUnicodeSet

	3786 };

	3787 static const UConverterStaticData _ISO2022JPStaticData={

	3788 sizeof(UConverterStaticData),

	3789 "ISO_2022_JP",

	3790 0,

	3791 UCNV_IBM,

	3792 UCNV_ISO_2022,

	3793 1,

	3794 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */

	3795 { 0x1a, 0, 0, 0 },

	3796 1,

	3797 FALSE,

	3798 FALSE,

	3799 0,

	3800 0,

	3801 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

	3802 };

	3803 static const UConverterSharedData _ISO2022JPData={

	3804 sizeof(UConverterSharedData),

	3805 ~((uint32_t) 0),

	3806 NULL,

	3807 NULL,

	3808 &_ISO2022JPStaticData,

	3809 FALSE,

	3810 &_ISO2022JPImpl,

	3811 0

	3812 };

	3813

	3814 /*********** KR *************/

	3815 static const UConverterImpl _ISO2022KRImpl={

	3816 UCNV_ISO_2022,

	3817

	3818 NULL,

	3819 NULL,

	3820

	3821 _ISO2022Open,

	3822 _ISO2022Close,

	3823 _ISO2022Reset,

	3824

	3825 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,

	3826 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,

	3827 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,

	3828 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,

	3829 NULL,

	3830

	3831 NULL,

	3832 _ISO2022getName,

	3833 _ISO_2022_WriteSub,

	3834 _ISO_2022_SafeClone,

	3835 _ISO_2022_GetUnicodeSet

	3836 };

	3837 static const UConverterStaticData _ISO2022KRStaticData={

	3838 sizeof(UConverterStaticData),

	3839 "ISO_2022_KR",

	3840 0,

	3841 UCNV_IBM,

	3842 UCNV_ISO_2022,

	3843 1,

	3844 3, /* max 3 bytes per UChar: SO+DBCS */

	3845 { 0x1a, 0, 0, 0 },

	3846 1,

	3847 FALSE,

	3848 FALSE,

	3849 0,

	3850 0,

	3851 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

	3852 };

	3853 static const UConverterSharedData _ISO2022KRData={

	3854 sizeof(UConverterSharedData),

	3855 ~((uint32_t) 0),

	3856 NULL,

	3857 NULL,

	3858 &_ISO2022KRStaticData,

	3859 FALSE,

	3860 &_ISO2022KRImpl,

	3861 0

	3862 };

	3863

	3864 /************* CN *************/

	3865 static const UConverterImpl _ISO2022CNImpl={

	3866

	3867 UCNV_ISO_2022,

	3868

	3869 NULL,

	3870 NULL,

	3871

	3872 _ISO2022Open,

	3873 _ISO2022Close,

	3874 _ISO2022Reset,

	3875

	3876 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,

	3877 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,

	3878 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,

	3879 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,

	3880 NULL,

	3881

	3882 NULL,

	3883 _ISO2022getName,

	3884 _ISO_2022_WriteSub,

	3885 _ISO_2022_SafeClone,

	3886 _ISO_2022_GetUnicodeSet

	3887 };

	3888 static const UConverterStaticData _ISO2022CNStaticData={

	3889 sizeof(UConverterStaticData),

	3890 "ISO_2022_CN",

	3891 0,

	3892 UCNV_IBM,

	3893 UCNV_ISO_2022,

	3894 1,

	3895 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + D BCS */

	3896 { 0x1a, 0, 0, 0 },

	3897 1,

	3898 FALSE,

	3899 FALSE,

	3900 0,

	3901 0,

	3902 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

	3903 };

	3904 static const UConverterSharedData _ISO2022CNData={

	3905 sizeof(UConverterSharedData),

	3906 ~((uint32_t) 0),

	3907 NULL,

	3908 NULL,

	3909 &_ISO2022CNStaticData,

	3910 FALSE,

	3911 &_ISO2022CNImpl,

	3912 0

	3913 };

	3914

	3915

	3916

	3917 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

OLD	NEW

« no previous file with comments | « icu46/source/common/ucnv.c ('k') | icu46/source/common/ucnv_bld.h » ('j') | no next file with comments »