icu46/source/common/ucnv2022.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/common/ucnv2022.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/common/ucnv2022.c

===================================================================

--- icu46/source/common/ucnv2022.c (revision 0)

+++ icu46/source/common/ucnv2022.c (revision 0)

@@ -0,0 +1,3917 @@

+/*

+**********************************************************************

+* file name: ucnv2022.c

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2000feb03

+* created by: Markus W. Scherer

+* Change history:

+* 06/29/2000 helena Major rewrite of the callback APIs.

+* 08/08/2000 Ram Included support for ISO-2022-JP-2

+* Changed implementation of toUnicode

+* function

+* 08/21/2000 Ram Added support for ISO-2022-KR

+* 08/29/2000 Ram Seperated implementation of EBCDIC to

+* ucnvebdc.c

+* 09/20/2000 Ram Added support for ISO-2022-CN

+* Added implementations for getNextUChar()

+* for specific 2022 country variants.

+* 10/31/2000 Ram Implemented offsets logic functions

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

+#include "unicode/ucnv.h"

+#include "unicode/uset.h"

+#include "unicode/ucnv_err.h"

+#include "unicode/ucnv_cb.h"

+#include "ucnv_imp.h"

+#include "ucnv_bld.h"

+#include "ucnv_cnv.h"

+#include "ucnvmbcs.h"

+#include "cstring.h"

+#include "cmemory.h"

+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

+#ifdef U_ENABLE_GENERIC_ISO_2022

+/*

+ * I am disabling the generic ISO-2022 converter after proposing to do so on

+ * the icu mailing list two days ago.

+ *

+ * Reasons:

+ * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of

+ * its designation sequences, single shifts with return to the previous state,

+ * switch-with-no-return to UTF-16BE or similar, etc.

+ * This is unlike the language-specific variants like ISO-2022-JP which

+ * require a much smaller repertoire of ISO-2022 features.

+ * These variants continue to be supported.

+ * 2. I believe that no one is really using the generic ISO-2022 converter

+ * but rather always one of the language-specific variants.

+ * Note that ICU's generic ISO-2022 converter has always output one escape

+ * sequence followed by UTF-8 for the whole stream.

+ * 3. Switching between subcharsets is extremely slow, because each time

+ * the previous converter is closed and a new one opened,

+ * without any kind of caching, least-recently-used list, etc.

+ * 4. The code is currently buggy, and given the above it does not seem

+ * reasonable to spend the time on maintenance.

+ * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.

+ * This means, for example, that when ISO-8859-7 is designated, the following

+ * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.

+ * The ICU ISO-2022 converter does not handle this - and has no information

+ * about which subconverter would have to be shifted vs. which is designed

+ * for 7-bit ISO-2022.

+ *

+ * Markus Scherer 2003-dec-03

+ */

+#endif

+static const char SHIFT_IN_STR[] = "\x0F";

+static const char SHIFT_OUT_STR[] = "\x0E";

+#define CR 0x0D

+#define LF 0x0A

+#define H_TAB 0x09

+#define V_TAB 0x0B

+#define SPACE 0x20

+enum {

+ HWKANA_START=0xff61,

+ HWKANA_END=0xff9f

+};

+/*

+ * 94-character sets with native byte values A1..FE are encoded in ISO 2022

+ * as bytes 21..7E. (Subtract 0x80.)

+ * 96-character sets with native byte values A0..FF are encoded in ISO 2022

+ * as bytes 20..7F. (Subtract 0x80.)

+ * Do not encode C1 control codes with native bytes 80..9F

+ * as bytes 00..1F (C0 control codes).

+ */

+enum {

+ GR94_START=0xa1,

+ GR94_END=0xfe,

+ GR96_START=0xa0,

+ GR96_END=0xff

+};

+/*

+ * ISO 2022 control codes must not be converted from Unicode

+ * because they would mess up the byte stream.

+ * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b

+ * corresponding to SO, SI, and ESC.

+ */

+#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)

+/* for ISO-2022-JP and -CN implementations */

+typedef enum {

+ /* shared values */

+ INVALID_STATE=-1,

+ ASCII = 0,

+ SS2_STATE=0x10,

+ SS3_STATE,

+ /* JP */

+ ISO8859_1 = 1 ,

+ ISO8859_7 = 2 ,

+ JISX201 = 3,

+ JISX208 = 4,

+ JISX212 = 5,

+ GB2312 =6,

+ KSC5601 =7,

+ HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */

+ /* CN */

+ /* the first few enum constants must keep their values because they correspond to myConverterArray[] */

+ GB2312_1=1,

+ ISO_IR_165=2,

+ CNS_11643=3,

+ /*

+ * these are used in StateEnum and ISO2022State variables,

+ * but CNS_11643 must be used to index into myConverterArray[]

+ */

+ CNS_11643_0=0x20,

+ CNS_11643_1,

+ CNS_11643_2,

+ CNS_11643_3,

+ CNS_11643_4,

+ CNS_11643_5,

+ CNS_11643_6,

+ CNS_11643_7

+} StateEnum;

+/* is the StateEnum charset value for a DBCS charset? */

+#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)

+#define CSM(cs) ((uint16_t)1<<(cs))

+/*

+ * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence

+ * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x

+ *

+ * Note: The converter uses some leniency:

+ * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in

+ * all versions, not just JIS7 and JIS8.

+ * - ICU does not distinguish between different versions of JIS X 0208.

+ */

+enum { MAX_JA_VERSION=4 };

+static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={

+ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),

+ CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),

+};

+typedef enum {

+ ASCII1=0,

+ LATIN1,

+ SBCS,

+ DBCS,

+ MBCS,

+ HWKANA

+}Cnv2022Type;

+typedef struct ISO2022State {

+ int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */

+ int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */

+ int8_t prevG; /* g before single shift (SS2 or SS3) */

+} ISO2022State;

+#define UCNV_OPTIONS_VERSION_MASK 0xf

+#define UCNV_2022_MAX_CONVERTERS 10

+typedef struct{

+ UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];

+ UConverter *currentConverter;

+ Cnv2022Type currentType;

+ ISO2022State toU2022State, fromU2022State;

+ uint32_t key;

+ uint32_t version;

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ UBool isFirstBuffer;

+#endif

+ UBool isEmptySegment;

+ char name[30];

+ char locale[3];

+}UConverterDataISO2022;

+/* Protos */

+/* ISO-2022 ----------------------------------------------------------------- */

+/*Forward declaration */

+U_CFUNC void

+ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,

+ UErrorCode * err);

+U_CFUNC void

+ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,

+ UErrorCode * err);

+#define ESC_2022 0x1B /*ESC*/

+typedef enum

+ INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/

+ VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/

+ VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/

+ VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/

+} UCNV_TableStates_2022;

+/*

+* The way these state transition arrays work is:

+* ex : ESC$B is the sequence for JISX208

+* a) First Iteration: char is ESC

+* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index

+* int x = normalize_esq_chars_2022[27] which is equal to 1

+* ii) Search for this value in escSeqStateTable_Key_2022[]

+* value of x is stored at escSeqStateTable_Key_2022[0]

+* iii) Save this index as offset

+* iv) Get state of this sequence from escSeqStateTable_Value_2022[]

+* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022

+* b) Switch on this state and continue to next char

+* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index

+* which is normalize_esq_chars_2022[36] == 4

+* ii) x is currently 1(from above)

+* x<<=5 -- x is now 32

+* x+=normalize_esq_chars_2022[36]

+* now x is 36

+* iii) Search for this value in escSeqStateTable_Key_2022[]

+* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2

+* iv) Get state of this sequence from escSeqStateTable_Value_2022[]

+* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022

+* c) Switch on this state and continue to next char

+* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index

+* ii) x is currently 36 (from above)

+* x<<=5 -- x is now 1152

+* x+=normalize_esq_chars_2022[66]

+* now x is 1161

+* iii) Search for this value in escSeqStateTable_Key_2022[]

+* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21

+* iv) Get state of this sequence from escSeqStateTable_Value_2022[21]

+* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022

+* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208

+*/

+/*Below are the 3 arrays depicting a state transition table*/

+static const int8_t normalize_esq_chars_2022[256] = {

+/* 0 1 2 3 4 5 6 7 8 9 */

+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0

+ ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0

+ ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0

+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

+ ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12

+ ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28

+ ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0

+ ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

+ ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

+ ,0 ,0 ,0 ,0 ,0 ,0

+};

+#ifdef U_ENABLE_GENERIC_ISO_2022

+/*

+ * When the generic ISO-2022 converter is completely removed, not just disabled

+ * per #ifdef, then the following state table and the associated tables that are

+ * dimensioned with MAX_STATES_2022 should be trimmed.

+ *

+ * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of

+ * the associated escape sequences starting with ESC ( B should be removed.

+ * This includes the ones with key values 1097 and all of the ones above 1000000.

+ *

+ * For the latter, the tables can simply be truncated.

+ * For the former, since the tables must be kept parallel, it is probably best

+ * to simply duplicate an adjacent table cell, parallel in all tables.

+ *

+ * It may make sense to restructure the tables, especially by using small search

+ * tables for the variants instead of indexing them parallel to the table here.

+ */

+#endif

+#define MAX_STATES_2022 74

+static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {

+/* 0 1 2 3 4 5 6 7 8 9 */

+ 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096

+ ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106

+ ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257

+ ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940

+ ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644

+ ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138

+ ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630

+ ,35947631 ,35947635 ,35947636 ,35947638

+};

+#ifdef U_ENABLE_GENERIC_ISO_2022

+static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {

+ /* 0 1 2 3 4 5 6 7 8 9 */

+ NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1"

+ ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"

+ ,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8"

+ ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"

+ ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"

+ ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"

+ ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"

+ ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"

+};

+#endif

+static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {

+/* 0 1 2 3 4 5 6 7 8 9 */

+ VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

+ ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

+ ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022

+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

+ ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

+};

+/* Type def for refactoring changeState_2022 code*/

+typedef enum{

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ ISO_2022=0,

+#endif

+ ISO_2022_JP=1,

+ ISO_2022_KR=2,

+ ISO_2022_CN=3

+} Variant2022;

+/*********** ISO 2022 Converter Protos ***********/

+static void

+_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);

+static void

+ _ISO2022Close(UConverter *converter);

+static void

+_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);

+static const char*

+_ISO2022getName(const UConverter* cnv);

+static void

+_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);

+static UConverter *

+_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);

+#ifdef U_ENABLE_GENERIC_ISO_2022

+static void

+T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);

+#endif

+/*const UConverterSharedData _ISO2022Data;*/

+static const UConverterSharedData _ISO2022JPData;

+static const UConverterSharedData _ISO2022KRData;

+static const UConverterSharedData _ISO2022CNData;

+/*************** Converter implementations ******************/

+/* The purpose of this function is to get around gcc compiler warnings. */

+static U_INLINE void

+fromUWriteUInt8(UConverter *cnv,

+ const char *bytes, int32_t length,

+ uint8_t **target, const char *targetLimit,

+ int32_t **offsets,

+ int32_t sourceIndex,

+ UErrorCode *pErrorCode)

+ char *targetChars = (char *)*target;

+ ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,

+ offsets, sourceIndex, pErrorCode);

+ *target = (uint8_t*)targetChars;

+static U_INLINE void

+setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){

+ if(myConverterData->version == 1) {

+ UConverter *cnv = myConverterData->currentConverter;

+ cnv->toUnicodeStatus=0; /* offset */

+ cnv->mode=0; /* state */

+ cnv->toULength=0; /* byteIndex */

+ }

+static U_INLINE void

+setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){

+ /* in ISO-2022-KR the designator sequence appears only once

+ * in a file so we append it only once

+ */

+ if( converter->charErrorBufferLength==0){

+ converter->charErrorBufferLength = 4;

+ converter->charErrorBuffer[0] = 0x1b;

+ converter->charErrorBuffer[1] = 0x24;

+ converter->charErrorBuffer[2] = 0x29;

+ converter->charErrorBuffer[3] = 0x43;

+ }

+ if(myConverterData->version == 1) {

+ UConverter *cnv = myConverterData->currentConverter;

+ cnv->fromUChar32=0;

+ cnv->fromUnicodeStatus=1; /* prevLength */

+ }

+static void

+_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

+ char myLocale[6]={' ',' ',' ',' ',' ',' '};

+ cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));

+ if(cnv->extraInfo != NULL) {

+ UConverterNamePieces stackPieces;

+ UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) };

+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;

+ uint32_t version;

+ stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;

+ uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));

+ myConverterData->currentType = ASCII1;

+ cnv->fromUnicodeStatus =FALSE;

+ if(pArgs->locale){

+ uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));

+ }

+ version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;

+ myConverterData->version = version;

+ if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&

+ (myLocale[2]=='_' || myLocale[2]=='\0'))

+ {

+ size_t len=0;

+ /* open the required converters and cache them */

+ if(version>MAX_JA_VERSION) {

+ /* prevent indexing beyond jpCharsetMasks[] */

+ myConverterData->version = version = 0;

+ }

+ if(jpCharsetMasks[version]&CSM(ISO8859_7)) {

+ myConverterData->myConverterArray[ISO8859_7] =

+ ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);

+ }

+ myConverterData->myConverterArray[JISX208] =

+ ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);

+ if(jpCharsetMasks[version]&CSM(JISX212)) {

+ myConverterData->myConverterArray[JISX212] =

+ ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);

+ }

+ if(jpCharsetMasks[version]&CSM(GB2312)) {

+ myConverterData->myConverterArray[GB2312] =

+ ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */

+ }

+ if(jpCharsetMasks[version]&CSM(KSC5601)) {

+ myConverterData->myConverterArray[KSC5601] =

+ ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);

+ }

+ /* set the function pointers to appropriate funtions */

+ cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);

+ uprv_strcpy(myConverterData->locale,"ja");

+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");

+ len = uprv_strlen(myConverterData->name);

+ myConverterData->name[len]=(char)(myConverterData->version+(int)'0');

+ myConverterData->name[len+1]='\0';

+ }

+ else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&

+ (myLocale[2]=='_' || myLocale[2]=='\0'))

+ {

+ const char *cnvName;

+ if(version==1) {

+ cnvName="icu-internal-25546";

+ } else {

+ cnvName="ibm-949";

+ myConverterData->version=version=0;

+ }

+ if(pArgs->onlyTestIsLoadable) {

+ ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */

+ uprv_free(cnv->extraInfo);

+ cnv->extraInfo=NULL;

+ return;

+ } else {

+ myConverterData->currentConverter=ucnv_open(cnvName, errorCode);

+ if (U_FAILURE(*errorCode)) {

+ _ISO2022Close(cnv);

+ return;

+ }

+ if(version==1) {

+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");

+ uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);

+ cnv->subCharLen = myConverterData->currentConverter->subCharLen;

+ }else{

+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");

+ }

+ /* initialize the state variables */

+ setInitialStateToUnicodeKR(cnv, myConverterData);

+ setInitialStateFromUnicodeKR(cnv, myConverterData);

+ /* set the function pointers to appropriate funtions */

+ cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;

+ uprv_strcpy(myConverterData->locale,"ko");

+ }

+ else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&

+ (myLocale[2]=='_' || myLocale[2]=='\0'))

+ {

+ /* open the required converters and cache them */

+ myConverterData->myConverterArray[GB2312_1] =

+ ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);

+ if(version==1) {

+ myConverterData->myConverterArray[ISO_IR_165] =

+ ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);

+ }

+ myConverterData->myConverterArray[CNS_11643] =

+ ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);

+ /* set the function pointers to appropriate funtions */

+ cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;

+ uprv_strcpy(myConverterData->locale,"cn");

+ if (version==0){

+ myConverterData->version = 0;

+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");

+ }else if (version==1){

+ myConverterData->version = 1;

+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");

+ }else {

+ myConverterData->version = 2;

+ (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");

+ }

+ else{

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ myConverterData->isFirstBuffer = TRUE;

+ /* append the UTF-8 escape sequence */

+ cnv->charErrorBufferLength = 3;

+ cnv->charErrorBuffer[0] = 0x1b;

+ cnv->charErrorBuffer[1] = 0x25;

+ cnv->charErrorBuffer[2] = 0x42;

+ cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;

+ /* initialize the state variables */

+ uprv_strcpy(myConverterData->name,"ISO_2022");

+#else

+ *errorCode = U_UNSUPPORTED_ERROR;

+ return;

+#endif

+ }

+ cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;

+ if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {

+ _ISO2022Close(cnv);

+ }

+ } else {

+ *errorCode = U_MEMORY_ALLOCATION_ERROR;

+ }

+static void

+_ISO2022Close(UConverter *converter) {

+ UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);

+ UConverterSharedData **array = myData->myConverterArray;

+ int32_t i;

+ if (converter->extraInfo != NULL) {

+ /*close the array of converter pointers and free the memory*/

+ for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

+ if(array[i]!=NULL) {

+ ucnv_unloadSharedDataIfReady(array[i]);

+ }

+ ucnv_close(myData->currentConverter);

+ if(!converter->isExtraLocal){

+ uprv_free (converter->extraInfo);

+ converter->extraInfo = NULL;

+ }

+static void

+_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {

+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);

+ if(choice<=UCNV_RESET_TO_UNICODE) {

+ uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));

+ myConverterData->key = 0;

+ myConverterData->isEmptySegment = FALSE;

+ }

+ if(choice!=UCNV_RESET_TO_UNICODE) {

+ uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));

+ }

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ if(myConverterData->locale[0] == 0){

+ if(choice<=UCNV_RESET_TO_UNICODE) {

+ myConverterData->isFirstBuffer = TRUE;

+ myConverterData->key = 0;

+ if (converter->mode == UCNV_SO){

+ ucnv_close (myConverterData->currentConverter);

+ myConverterData->currentConverter=NULL;

+ }

+ converter->mode = UCNV_SI;

+ }

+ if(choice!=UCNV_RESET_TO_UNICODE) {

+ /* re-append UTF-8 escape sequence */

+ converter->charErrorBufferLength = 3;

+ converter->charErrorBuffer[0] = 0x1b;

+ converter->charErrorBuffer[1] = 0x28;

+ converter->charErrorBuffer[2] = 0x42;

+ }

+ else

+#endif

+ {

+ /* reset the state variables */

+ if(myConverterData->locale[0] == 'k'){

+ if(choice<=UCNV_RESET_TO_UNICODE) {

+ setInitialStateToUnicodeKR(converter, myConverterData);

+ }

+ if(choice!=UCNV_RESET_TO_UNICODE) {

+ setInitialStateFromUnicodeKR(converter, myConverterData);

+ }

+static const char*

+_ISO2022getName(const UConverter* cnv){

+ if(cnv->extraInfo){

+ UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;

+ return myData->name;

+ }

+ return NULL;

+/*************** to unicode *******************/

+/****************************************************************************

+ * Recognized escape sequences are

+ * <ESC>(B ASCII

+ * <ESC>.A ISO-8859-1

+ * <ESC>.F ISO-8859-7

+ * <ESC>(J JISX-201

+ * <ESC>(I JISX-201

+ * <ESC>$B JISX-208

+ * <ESC>$@ JISX-208

+ * <ESC>$(D JISX-212

+ * <ESC>$A GB2312

+ * <ESC>$(C KSC5601

+ */

+static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {

+/* 0 1 2 3 4 5 6 7 8 9 */

+ INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+ ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE

+ ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+ ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE

+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+};

+/*************** to unicode *******************/

+static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {

+/* 0 1 2 3 4 5 6 7 8 9 */

+ INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165

+ ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+ ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

+};

+static UCNV_TableStates_2022

+getKey_2022(char c,int32_t* key,int32_t* offset){

+ int32_t togo;

+ int32_t low = 0;

+ int32_t hi = MAX_STATES_2022;

+ int32_t oldmid=0;

+ togo = normalize_esq_chars_2022[(uint8_t)c];

+ if(togo == 0) {

+ /* not a valid character anywhere in an escape sequence */

+ *key = 0;

+ *offset = 0;

+ return INVALID_2022;

+ }

+ togo = (*key << 5) + togo;

+ while (hi != low) /*binary search*/{

+ register int32_t mid = (hi+low) >> 1; /*Finds median*/

+ if (mid == oldmid)

+ break;

+ if (escSeqStateTable_Key_2022[mid] > togo){

+ hi = mid;

+ }

+ else if (escSeqStateTable_Key_2022[mid] < togo){

+ low = mid;

+ }

+ else /*we found it*/{

+ *key = togo;

+ *offset = mid;

+ return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];

+ }

+ oldmid = mid;

+ }

+ *key = 0;

+ *offset = 0;

+ return INVALID_2022;

+/*runs through a state machine to determine the escape sequence - codepage correspondance

+ */

+static void

+changeState_2022(UConverter* _this,

+ const char** source,

+ const char* sourceLimit,

+ Variant2022 var,

+ UErrorCode* err){

+ UCNV_TableStates_2022 value;

+ UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);

+ uint32_t key = myData2022->key;

+ int32_t offset = 0;

+ int8_t initialToULength = _this->toULength;

+ char c;

+ value = VALID_NON_TERMINAL_2022;

+ while (*source < sourceLimit) {

+ c = *(*source)++;

+ _this->toUBytes[_this->toULength++]=(uint8_t)c;

+ value = getKey_2022(c,(int32_t *) &key, &offset);

+ switch (value){

+ case VALID_NON_TERMINAL_2022 :

+ /* continue with the loop */

+ break;

+ case VALID_TERMINAL_2022:

+ key = 0;

+ goto DONE;

+ case INVALID_2022:

+ goto DONE;

+ case VALID_MAYBE_TERMINAL_2022:

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ /* ESC ( B is ambiguous only for ISO_2022 itself */

+ if(var == ISO_2022) {

+ /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */

+ _this->toULength = 0;

+ /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */

+ /* continue with the loop */

+ value = VALID_NON_TERMINAL_2022;

+ break;

+ } else

+#endif

+ {

+ /* not ISO_2022 itself, finish here */

+ value = VALID_TERMINAL_2022;

+ key = 0;

+ goto DONE;

+ }

+DONE:

+ myData2022->key = key;

+ if (value == VALID_NON_TERMINAL_2022) {

+ /* indicate that the escape sequence is incomplete: key!=0 */

+ return;

+ } else if (value == INVALID_2022 ) {

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ } else /* value == VALID_TERMINAL_2022 */ {

+ switch(var){

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ case ISO_2022:

+ {

+ const char *chosenConverterName = escSeqStateTable_Result_2022[offset];

+ if(chosenConverterName == NULL) {

+ /* SS2 or SS3 */

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ _this->toUCallbackReason = UCNV_UNASSIGNED;

+ return;

+ }

+ _this->mode = UCNV_SI;

+ ucnv_close(myData2022->currentConverter);

+ myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);

+ if(U_SUCCESS(*err)) {

+ myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;

+ _this->mode = UCNV_SO;

+ }

+ break;

+ }

+#endif

+ case ISO_2022_JP:

+ {

+ StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];

+ switch(tempState) {

+ case INVALID_STATE:

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ break;

+ case SS2_STATE:

+ if(myData2022->toU2022State.cs[2]!=0) {

+ if(myData2022->toU2022State.g<2) {

+ myData2022->toU2022State.prevG=myData2022->toU2022State.g;

+ }

+ myData2022->toU2022State.g=2;

+ } else {

+ /* illegal to have SS2 before a matching designator */

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ }

+ break;

+ /* case SS3_STATE: not used in ISO-2022-JP-x */

+ case ISO8859_1:

+ case ISO8859_7:

+ if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ } else {

+ /* G2 charset for SS2 */

+ myData2022->toU2022State.cs[2]=(int8_t)tempState;

+ }

+ break;

+ default:

+ if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ } else {

+ /* G0 charset */

+ myData2022->toU2022State.cs[0]=(int8_t)tempState;

+ }

+ break;

+ }

+ break;

+ case ISO_2022_CN:

+ {

+ StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];

+ switch(tempState) {

+ case INVALID_STATE:

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ break;

+ case SS2_STATE:

+ if(myData2022->toU2022State.cs[2]!=0) {

+ if(myData2022->toU2022State.g<2) {

+ myData2022->toU2022State.prevG=myData2022->toU2022State.g;

+ }

+ myData2022->toU2022State.g=2;

+ } else {

+ /* illegal to have SS2 before a matching designator */

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ }

+ break;

+ case SS3_STATE:

+ if(myData2022->toU2022State.cs[3]!=0) {

+ if(myData2022->toU2022State.g<2) {

+ myData2022->toU2022State.prevG=myData2022->toU2022State.g;

+ }

+ myData2022->toU2022State.g=3;

+ } else {

+ /* illegal to have SS3 before a matching designator */

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ }

+ break;

+ case ISO_IR_165:

+ if(myData2022->version==0) {

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ break;

+ }

+ /*fall through*/

+ case GB2312_1:

+ /*fall through*/

+ case CNS_11643_1:

+ myData2022->toU2022State.cs[1]=(int8_t)tempState;

+ break;

+ case CNS_11643_2:

+ myData2022->toU2022State.cs[2]=(int8_t)tempState;

+ break;

+ default:

+ /* other CNS 11643 planes */

+ if(myData2022->version==0) {

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ } else {

+ myData2022->toU2022State.cs[3]=(int8_t)tempState;

+ }

+ break;

+ }

+ break;

+ case ISO_2022_KR:

+ if(offset==0x30){

+ /* nothing to be done, just accept this one escape sequence */

+ } else {

+ *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

+ }

+ break;

+ default:

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ break;

+ }

+ if(U_SUCCESS(*err)) {

+ _this->toULength = 0;

+ } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {

+ if(_this->toULength>1) {

+ /*

+ * Ticket 5691: consistent illegal sequences:

+ * - We include at least the first byte (ESC) in the illegal sequence.

+ * - If any of the non-initial bytes could be the start of a character,

+ * we stop the illegal sequence before the first one of those.

+ * In escape sequences, all following bytes are "printable", that is,

+ * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),

+ * they are valid single/lead bytes.

+ * For simplicity, we always only report the initial ESC byte as the

+ * illegal sequence and back out all other bytes we looked at.

+ */

+ /* Back out some bytes. */

+ int8_t backOutDistance=_this->toULength-1;

+ int8_t bytesFromThisBuffer=_this->toULength-initialToULength;

+ if(backOutDistance<=bytesFromThisBuffer) {

+ /* same as initialToULength<=1 */

+ *source-=backOutDistance;

+ } else {

+ /* Back out bytes from the previous buffer: Need to replay them. */

+ _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);

+ /* same as -(initialToULength-1) */

+ /* preToULength is negative! */

+ uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);

+ *source-=bytesFromThisBuffer;

+ }

+ _this->toULength=1;

+ }

+ } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {

+ _this->toUCallbackReason = UCNV_UNASSIGNED;

+ }

+/*Checks the characters of the buffer against valid 2022 escape sequences

+*if the match we return a pointer to the initial start of the sequence otherwise

+*we return sourceLimit

+*/

+/*for 2022 looks ahead in the stream

+ *to determine the longest possible convertible

+ *data stream

+ */

+static U_INLINE const char*

+getEndOfBuffer_2022(const char** source,

+ const char* sourceLimit,

+ UBool flush){

+ const char* mySource = *source;

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ if (*source >= sourceLimit)

+ return sourceLimit;

+ do{

+ if (*mySource == ESC_2022){

+ int8_t i;

+ int32_t key = 0;

+ int32_t offset;

+ UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;

+ /* Kludge: I could not

+ * figure out the reason for validating an escape sequence

+ * twice - once here and once in changeState_2022().

+ * is it possible to have an ESC character in a ISO2022

+ * byte stream which is valid in a code page? Is it legal?

+ */

+ for (i=0;

+ (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);

+ i++) {

+ value = getKey_2022(*(mySource+i), &key, &offset);

+ }

+ if (value > 0 || *mySource==ESC_2022)

+ return mySource;

+ if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )

+ return sourceLimit;

+ }

+ }while (++mySource < sourceLimit);

+ return sourceLimit;

+#else

+ while(mySource < sourceLimit && *mySource != ESC_2022) {

+ ++mySource;

+ }

+ return mySource;

+#endif

+/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c

+ * any future change in _MBCSFromUChar32() function should be reflected here.

+ * @return number of bytes in *value; negative number if fallback; 0 if no mapping

+ */

+static U_INLINE int32_t

+MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,

+ UChar32 c,

+ uint32_t* value,

+ UBool useFallback,

+ int outputType)

+ const int32_t *cx;

+ const uint16_t *table;

+ uint32_t stage2Entry;

+ uint32_t myValue;

+ int32_t length;

+ const uint8_t *p;

+ /*

+ * TODO(markus): Use and require new, faster MBCS conversion table structures.

+ * Use internal version of ucnv_open() that verifies that the new structures are available,

+ * else U_INTERNAL_PROGRAM_ERROR.

+ */

+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */

+ if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {

+ table=sharedData->mbcs.fromUnicodeTable;

+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);

+ /* get the bytes and the length for the output */

+ if(outputType==MBCS_OUTPUT_2){

+ myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);

+ if(myValue<=0xff) {

+ length=1;

+ } else {

+ length=2;

+ }

+ } else /* outputType==MBCS_OUTPUT_3 */ {

+ p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);

+ myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];

+ if(myValue<=0xff) {

+ length=1;

+ } else if(myValue<=0xffff) {

+ length=2;

+ } else {

+ length=3;

+ }

+ /* is this code point assigned, or do we use fallbacks? */

+ if((stage2Entry&(1<<(16+(c&0xf))))!=0) {

+ /* assigned */

+ *value=myValue;

+ return length;

+ } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {

+ /*

+ * We allow a 0 byte output if the "assigned" bit is set for this entry.

+ * There is no way with this data structure for fallback output

+ * to be a zero byte.

+ */

+ *value=myValue;

+ return -length;

+ }

+ cx=sharedData->mbcs.extIndexes;

+ if(cx!=NULL) {

+ return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);

+ }

+ /* unassigned */

+ return 0;

+/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c

+ * any future change in _MBCSSingleFromUChar32() function should be reflected here.

+ * @param retval pointer to output byte

+ * @return 1 roundtrip byte 0 no mapping -1 fallback byte

+ */

+static U_INLINE int32_t

+MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,

+ UChar32 c,

+ uint32_t* retval,

+ UBool useFallback)

+ const uint16_t *table;

+ int32_t value;

+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */

+ if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {

+ return 0;

+ }

+ /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */

+ table=sharedData->mbcs.fromUnicodeTable;

+ /* get the byte for the output */

+ value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);

+ /* is this code point assigned, or do we use fallbacks? */

+ *retval=(uint32_t)(value&0xff);

+ if(value>=0xf00) {

+ return 1; /* roundtrip */

+ } else if(useFallback ? value>=0x800 : value>=0xc00) {

+ return -1; /* fallback taken */

+ } else {

+ return 0; /* no mapping */

+ }

+/*

+ * Check that the result is a 2-byte value with each byte in the range A1..FE

+ * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte

+ * to move it to the ISO 2022 range 21..7E.

+ * Return 0 if out of range.

+ */

+static U_INLINE uint32_t

+_2022FromGR94DBCS(uint32_t value) {

+ if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&

+ (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)

+ ) {

+ return value - 0x8080; /* shift down to 21..7e byte range */

+ } else {

+ return 0; /* not valid for ISO 2022 */

+ }

+#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */

+/*

+ * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the

+ * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point

+ * unchanged.

+ */

+static U_INLINE uint32_t

+_2022ToGR94DBCS(uint32_t value) {

+ uint32_t returnValue = value + 0x8080;

+ if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&

+ (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {

+ return returnValue;

+ } else {

+ return value;

+ }

+#endif

+#ifdef U_ENABLE_GENERIC_ISO_2022

+/**********************************************************************************

+* ISO-2022 Converter

+*/

+static void

+T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,

+ UErrorCode* err){

+ const char* mySourceLimit, *realSourceLimit;

+ const char* sourceStart;

+ const UChar* myTargetStart;

+ UConverter* saveThis;

+ UConverterDataISO2022* myData;

+ int8_t length;

+ saveThis = args->converter;

+ myData=((UConverterDataISO2022*)(saveThis->extraInfo));

+ realSourceLimit = args->sourceLimit;

+ while (args->source < realSourceLimit) {

+ if(myData->key == 0) { /* are we in the middle of an escape sequence? */

+ /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/

+ mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);

+ if(args->source < mySourceLimit) {

+ if(myData->currentConverter==NULL) {

+ myData->currentConverter = ucnv_open("ASCII",err);

+ if(U_FAILURE(*err)){

+ return;

+ }

+ myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;

+ saveThis->mode = UCNV_SO;

+ }

+ /* convert to before the ESC or until the end of the buffer */

+ myData->isFirstBuffer=FALSE;

+ sourceStart = args->source;

+ myTargetStart = args->target;

+ args->converter = myData->currentConverter;

+ ucnv_toUnicode(args->converter,

+ &args->target,

+ args->targetLimit,

+ &args->source,

+ mySourceLimit,

+ args->offsets,

+ (UBool)(args->flush && mySourceLimit == realSourceLimit),

+ err);

+ args->converter = saveThis;

+ if (*err == U_BUFFER_OVERFLOW_ERROR) {

+ /* move the overflow buffer */

+ length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;

+ myData->currentConverter->UCharErrorBufferLength = 0;

+ if(length > 0) {

+ uprv_memcpy(saveThis->UCharErrorBuffer,

+ myData->currentConverter->UCharErrorBuffer,

+ length*U_SIZEOF_UCHAR);

+ }

+ return;

+ }

+ /*

+ * At least one of:

+ * -Error while converting

+ * -Done with entire buffer

+ * -Need to write offsets or update the current offset

+ * (leave that up to the code in ucnv.c)

+ *

+ * or else we just stopped at an ESC byte and continue with changeState_2022()

+ */

+ if (U_FAILURE(*err) ||

+ (args->source == realSourceLimit) ||

+ (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||

+ (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))

+ ) {

+ /* copy partial or error input for truncated detection and error handling */

+ if(U_FAILURE(*err)) {

+ length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;

+ if(length > 0) {

+ uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);

+ }

+ } else {

+ length = saveThis->toULength = myData->currentConverter->toULength;

+ if(length > 0) {

+ uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);

+ if(args->source < mySourceLimit) {

+ *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */

+ }

+ return;

+ }

+ sourceStart = args->source;

+ changeState_2022(args->converter,

+ &(args->source),

+ realSourceLimit,

+ ISO_2022,

+ err);

+ if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {

+ /* let the ucnv.c code update its current offset */

+ return;

+ }

+#endif

+/*

+ * To Unicode Callback helper function

+ */

+static void

+toUnicodeCallback(UConverter *cnv,

+ const uint32_t sourceChar, const uint32_t targetUniChar,

+ UErrorCode* err){

+ if(sourceChar>0xff){

+ cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);

+ cnv->toUBytes[1] = (uint8_t)sourceChar;

+ cnv->toULength = 2;

+ }

+ else{

+ cnv->toUBytes[0] =(char) sourceChar;

+ cnv->toULength = 1;

+ }

+ if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){

+ *err = U_INVALID_CHAR_FOUND;

+ }

+ else{

+ *err = U_ILLEGAL_CHAR_FOUND;

+ }

+/**************************************ISO-2022-JP*************************************************/

+/************************************** IMPORTANT **************************************************

+* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and

+* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().

+* The converter iterates over each Unicode codepoint

+* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is

+* processed one char at a time it would make sense to reduce the extra processing a canned converter

+* would do as far as possible.

+* If the implementation of these macros or structure of sharedData struct change in the future, make

+* sure that ISO-2022 is also changed.

+***************************************************************************************************

+*/

+/***************************************************************************************************

+* Rules for ISO-2022-jp encoding

+* (i) Escape sequences must be fully contained within a line they should not

+* span new lines or CRs

+* (ii) If the last character on a line is represented by two bytes then an ASCII or

+* JIS-Roman character escape sequence should follow before the line terminates

+* (iii) If the first character on the line is represented by two bytes then a two

+* byte character escape sequence should precede it

+* (iv) If no escape sequence is encountered then the characters are ASCII

+* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,

+* and invoked with SS2 (ESC N).

+* (vi) If there is any G0 designation in text, there must be a switch to

+* ASCII or to JIS X 0201-Roman before a space character (but not

+* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control

+* characters such as tab or CRLF.

+* (vi) Supported encodings:

+* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7

+* source : RFC-1554

+* JISX201, JISX208,JISX212 : new .cnv data files created

+* KSC5601 : alias to ibm-949 mapping table

+* GB2312 : alias to ibm-1386 mapping table

+* ISO-8859-1 : Algorithmic implemented as LATIN1 case

+* ISO-8859-7 : alisas to ibm-9409 mapping table

+*/

+/* preference order of JP charsets */

+static const StateEnum jpCharsetPref[]={

+ ASCII,

+ JISX201,

+ ISO8859_1,

+ ISO8859_7,

+ JISX208,

+ JISX212,

+ GB2312,

+ KSC5601,

+ HWKANA_7BIT

+};

+/*

+ * The escape sequences must be in order of the enum constants like JISX201 = 3,

+ * not in order of jpCharsetPref[]!

+ */

+static const char escSeqChars[][6] ={

+ "\x1B\x28\x42", /* <ESC>(B ASCII */

+ "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */

+ "\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */

+ "\x1B\x28\x4A", /* <ESC>(J JISX-201 */

+ "\x1B\x24\x42", /* <ESC>$B JISX-208 */

+ "\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */

+ "\x1B\x24\x41", /* <ESC>$A GB2312 */

+ "\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */

+ "\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */

+};

+static const int8_t escSeqCharsLen[] ={

+ 3, /* length of <ESC>(B ASCII */

+ 3, /* length of <ESC>.A ISO-8859-1 */

+ 3, /* length of <ESC>.F ISO-8859-7 */

+ 3, /* length of <ESC>(J JISX-201 */

+ 3, /* length of <ESC>$B JISX-208 */

+ 4, /* length of <ESC>$(D JISX-212 */

+ 3, /* length of <ESC>$A GB2312 */

+ 4, /* length of <ESC>$(C KSC5601 */

+ 3 /* length of <ESC>(I HWKANA_7BIT */

+};

+/*

+* The iteration over various code pages works this way:

+* i) Get the currentState from myConverterData->currentState

+* ii) Check if the character is mapped to a valid character in the currentState

+* Yes -> a) set the initIterState to currentState

+* b) remain in this state until an invalid character is found

+* No -> a) go to the next code page and find the character

+* iii) Before changing the state increment the current state check if the current state

+* is equal to the intitIteration state

+* Yes -> A character that cannot be represented in any of the supported encodings

+* break and return a U_INVALID_CHARACTER error

+* No -> Continue and find the character in next code page

+* TODO: Implement a priority technique where the users are allowed to set the priority of code pages

+*/

+/* Map 00..7F to Unicode according to JIS X 0201. */

+static U_INLINE uint32_t

+jisx201ToU(uint32_t value) {

+ if(value < 0x5c) {

+ return value;

+ } else if(value == 0x5c) {

+ return 0xa5;

+ } else if(value == 0x7e) {

+ return 0x203e;

+ } else /* value <= 0x7f */ {

+ return value;

+ }

+/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */

+static U_INLINE uint32_t

+jisx201FromU(uint32_t value) {

+ if(value<=0x7f) {

+ if(value!=0x5c && value!=0x7e) {

+ return value;

+ }

+ } else if(value==0xa5) {

+ return 0x5c;

+ } else if(value==0x203e) {

+ return 0x7e;

+ }

+ return 0xfffe;

+/*

+ * Take a valid Shift-JIS byte pair, check that it is in the range corresponding

+ * to JIS X 0208, and convert it to a pair of 21..7E bytes.

+ * Return 0 if the byte pair is out of range.

+ */

+static U_INLINE uint32_t

+_2022FromSJIS(uint32_t value) {

+ uint8_t trail;

+ if(value > 0xEFFC) {

+ return 0; /* beyond JIS X 0208 */

+ }

+ trail = (uint8_t)value;

+ value &= 0xff00; /* lead byte */

+ if(value <= 0x9f00) {

+ value -= 0x7000;

+ } else /* 0xe000 <= value <= 0xef00 */ {

+ value -= 0xb000;

+ }

+ value <<= 1;

+ if(trail <= 0x9e) {

+ value -= 0x100;

+ if(trail <= 0x7e) {

+ value |= trail - 0x1f;

+ } else {

+ value |= trail - 0x20;

+ }

+ } else /* trail <= 0xfc */ {

+ value |= trail - 0x7e;

+ }

+ return value;

+/*

+ * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.

+ * If either byte is outside 21..7E make sure that the result is not valid

+ * for Shift-JIS so that the converter catches it.

+ * Some invalid byte values already turn into equally invalid Shift-JIS

+ * byte values and need not be tested explicitly.

+ */

+static U_INLINE void

+_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {

+ if(c1&1) {

+ ++c1;

+ if(c2 <= 0x5f) {

+ c2 += 0x1f;

+ } else if(c2 <= 0x7e) {

+ c2 += 0x20;

+ } else {

+ c2 = 0; /* invalid */

+ }

+ } else {

+ if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {

+ c2 += 0x7e;

+ } else {

+ c2 = 0; /* invalid */

+ }

+ c1 >>= 1;

+ if(c1 <= 0x2f) {

+ c1 += 0x70;

+ } else if(c1 <= 0x3f) {

+ c1 += 0xb0;

+ } else {

+ c1 = 0; /* invalid */

+ }

+ bytes[0] = (char)c1;

+ bytes[1] = (char)c2;

+/*

+ * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)

+ * Katakana.

+ * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks

+ * because Shift-JIS roundtrips half-width Katakana to single bytes.

+ * These were the only fallbacks in ICU's jisx-208.ucm file.

+ */

+static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {

+ 0x2123, /* U+FF61 */

+ 0x2156,

+ 0x2157,

+ 0x2122,

+ 0x2126,

+ 0x2572,

+ 0x2521,

+ 0x2523,

+ 0x2525,

+ 0x2527,

+ 0x2529,

+ 0x2563,

+ 0x2565,

+ 0x2567,

+ 0x2543,

+ 0x213C, /* U+FF70 */

+ 0x2522,

+ 0x2524,

+ 0x2526,

+ 0x2528,

+ 0x252A,

+ 0x252B,

+ 0x252D,

+ 0x252F,

+ 0x2531,

+ 0x2533,

+ 0x2535,

+ 0x2537,

+ 0x2539,

+ 0x253B,

+ 0x253D,

+ 0x253F, /* U+FF80 */

+ 0x2541,

+ 0x2544,

+ 0x2546,

+ 0x2548,

+ 0x254A,

+ 0x254B,

+ 0x254C,

+ 0x254D,

+ 0x254E,

+ 0x254F,

+ 0x2552,

+ 0x2555,

+ 0x2558,

+ 0x255B,

+ 0x255E,

+ 0x255F, /* U+FF90 */

+ 0x2560,

+ 0x2561,

+ 0x2562,

+ 0x2564,

+ 0x2566,

+ 0x2568,

+ 0x2569,

+ 0x256A,

+ 0x256B,

+ 0x256C,

+ 0x256D,

+ 0x256F,

+ 0x2573,

+ 0x212B,

+ 0x212C /* U+FF9F */

+};

+static void

+UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {

+ UConverter *cnv = args->converter;

+ UConverterDataISO2022 *converterData;

+ ISO2022State *pFromU2022State;

+ uint8_t *target = (uint8_t *) args->target;

+ const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;

+ const UChar* source = args->source;

+ const UChar* sourceLimit = args->sourceLimit;

+ int32_t* offsets = args->offsets;

+ UChar32 sourceChar;

+ char buffer[8];

+ int32_t len, outLen;

+ int8_t choices[10];

+ int32_t choiceCount;

+ uint32_t targetValue = 0;

+ UBool useFallback;

+ int32_t i;

+ int8_t cs, g;

+ /* set up the state */

+ converterData = (UConverterDataISO2022*)cnv->extraInfo;

+ pFromU2022State = &converterData->fromU2022State;

+ choiceCount = 0;

+ /* check if the last codepoint of previous buffer was a lead surrogate*/

+ if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {

+ goto getTrail;

+ }

+ while(source < sourceLimit) {

+ if(target < targetLimit) {

+ sourceChar = *(source++);

+ /*check if the char is a First surrogate*/

+ if(UTF_IS_SURROGATE(sourceChar)) {

+ if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

+getTrail:

+ /*look ahead to find the trail surrogate*/

+ if(source < sourceLimit) {

+ /* test the following code unit */

+ UChar trail=(UChar) *source;

+ if(UTF_IS_SECOND_SURROGATE(trail)) {

+ source++;

+ sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

+ cnv->fromUChar32=0x00;

+ /* convert this supplementary code point */

+ /* exit this condition tree */

+ } else {

+ /* this is an unmatched lead code unit (1st surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ } else {

+ /* no more input */

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ } else {

+ /* this is an unmatched trail code unit (2nd surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ /* do not convert SO/SI/ESC */

+ if(IS_2022_CONTROL(sourceChar)) {

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ /* do the conversion */

+ if(choiceCount == 0) {

+ uint16_t csm;

+ /*

+ * The csm variable keeps track of which charsets are allowed

+ * and not used yet while building the choices[].

+ */

+ csm = jpCharsetMasks[converterData->version];

+ choiceCount = 0;

+ /* JIS7/8: try single-byte half-width Katakana before JISX208 */

+ if(converterData->version == 3 || converterData->version == 4) {

+ choices[choiceCount++] = (int8_t)HWKANA_7BIT;

+ }

+ /* Do not try single-byte half-width Katakana for other versions. */

+ csm &= ~CSM(HWKANA_7BIT);

+ /* try the current G0 charset */

+ choices[choiceCount++] = cs = pFromU2022State->cs[0];

+ csm &= ~CSM(cs);

+ /* try the current G2 charset */

+ if((cs = pFromU2022State->cs[2]) != 0) {

+ choices[choiceCount++] = cs;

+ csm &= ~CSM(cs);

+ }

+ /* try all the other possible charsets */

+ for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {

+ cs = (int8_t)jpCharsetPref[i];

+ if(CSM(cs) & csm) {

+ choices[choiceCount++] = cs;

+ csm &= ~CSM(cs);

+ }

+ cs = g = 0;

+ /*

+ * len==0: no mapping found yet

+ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks

+ * len>0: found a roundtrip result, done

+ */

+ len = 0;

+ /*

+ * We will turn off useFallback after finding a fallback,

+ * but we still get fallbacks from PUA code points as usual.

+ * Therefore, we will also need to check that we don't overwrite

+ * an early fallback with a later one.

+ */

+ useFallback = cnv->useFallback;

+ for(i = 0; i < choiceCount && len <= 0; ++i) {

+ uint32_t value;

+ int32_t len2;

+ int8_t cs0 = choices[i];

+ switch(cs0) {

+ case ASCII:

+ if(sourceChar <= 0x7f) {

+ targetValue = (uint32_t)sourceChar;

+ len = 1;

+ cs = cs0;

+ g = 0;

+ }

+ break;

+ case ISO8859_1:

+ if(GR96_START <= sourceChar && sourceChar <= GR96_END) {

+ targetValue = (uint32_t)sourceChar - 0x80;

+ len = 1;

+ cs = cs0;

+ g = 2;

+ }

+ break;

+ case HWKANA_7BIT:

+ if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {

+ if(converterData->version==3) {

+ /* JIS7: use G1 (SO) */

+ /* Shift U+FF61..U+FF9F to bytes 21..5F. */

+ targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));

+ len = 1;

+ pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */

+ g = 1;

+ } else if(converterData->version==4) {

+ /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */

+ /* Shift U+FF61..U+FF9F to bytes A1..DF. */

+ targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));

+ len = 1;

+ cs = pFromU2022State->cs[0];

+ if(IS_JP_DBCS(cs)) {

+ /* switch from a DBCS charset to JISX201 */

+ cs = (int8_t)JISX201;

+ }

+ /* else stay in the current G0 charset */

+ g = 0;

+ }

+ /* else do not use HWKANA_7BIT with other versions */

+ }

+ break;

+ case JISX201:

+ /* G0 SBCS */

+ value = jisx201FromU(sourceChar);

+ if(value <= 0x7f) {

+ targetValue = value;

+ len = 1;

+ cs = cs0;

+ g = 0;

+ useFallback = FALSE;

+ }

+ break;

+ case JISX208:

+ /* G0 DBCS from Shift-JIS table */

+ len2 = MBCS_FROM_UCHAR32_ISO2022(

+ converterData->myConverterArray[cs0],

+ sourceChar, &value,

+ useFallback, MBCS_OUTPUT_2);

+ if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

+ value = _2022FromSJIS(value);

+ if(value != 0) {

+ targetValue = value;

+ len = len2;

+ cs = cs0;

+ g = 0;

+ useFallback = FALSE;

+ }

+ } else if(len == 0 && useFallback &&

+ (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {

+ targetValue = hwkana_fb[sourceChar - HWKANA_START];

+ len = -2;

+ cs = cs0;

+ g = 0;

+ useFallback = FALSE;

+ }

+ break;

+ case ISO8859_7:

+ /* G0 SBCS forced to 7-bit output */

+ len2 = MBCS_SINGLE_FROM_UCHAR32(

+ converterData->myConverterArray[cs0],

+ sourceChar, &value,

+ useFallback);

+ if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {

+ targetValue = value - 0x80;

+ len = len2;

+ cs = cs0;

+ g = 2;

+ useFallback = FALSE;

+ }

+ break;

+ default:

+ /* G0 DBCS */

+ len2 = MBCS_FROM_UCHAR32_ISO2022(

+ converterData->myConverterArray[cs0],

+ sourceChar, &value,

+ useFallback, MBCS_OUTPUT_2);

+ if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

+ if(cs0 == KSC5601) {

+ /*

+ * Check for valid bytes for the encoding scheme.

+ * This is necessary because the sub-converter (windows-949)

+ * has a broader encoding scheme than is valid for 2022.

+ */

+ value = _2022FromGR94DBCS(value);

+ if(value == 0) {

+ break;

+ }

+ targetValue = value;

+ len = len2;

+ cs = cs0;

+ g = 0;

+ useFallback = FALSE;

+ }

+ break;

+ }

+ if(len != 0) {

+ if(len < 0) {

+ len = -len; /* fallback */

+ }

+ outLen = 0; /* count output bytes */

+ /* write SI if necessary (only for JIS7) */

+ if(pFromU2022State->g == 1 && g == 0) {

+ buffer[outLen++] = UCNV_SI;

+ pFromU2022State->g = 0;

+ }

+ /* write the designation sequence if necessary */

+ if(cs != pFromU2022State->cs[g]) {

+ int32_t escLen = escSeqCharsLen[cs];

+ uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);

+ outLen += escLen;

+ pFromU2022State->cs[g] = cs;

+ /* invalidate the choices[] */

+ choiceCount = 0;

+ }

+ /* write the shift sequence if necessary */

+ if(g != pFromU2022State->g) {

+ switch(g) {

+ /* case 0 handled before writing escapes */

+ case 1:

+ buffer[outLen++] = UCNV_SO;

+ pFromU2022State->g = 1;

+ break;

+ default: /* case 2 */

+ buffer[outLen++] = 0x1b;

+ buffer[outLen++] = 0x4e;

+ break;

+ /* no case 3: no SS3 in ISO-2022-JP-x */

+ }

+ /* write the output bytes */

+ if(len == 1) {

+ buffer[outLen++] = (char)targetValue;

+ } else /* len == 2 */ {

+ buffer[outLen++] = (char)(targetValue >> 8);

+ buffer[outLen++] = (char)targetValue;

+ }

+ } else {

+ /*

+ * if we cannot find the character after checking all codepages

+ * then this is an error

+ */

+ *err = U_INVALID_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ if(sourceChar == CR || sourceChar == LF) {

+ /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */

+ pFromU2022State->cs[2] = 0;

+ choiceCount = 0;

+ }

+ /* output outLen>0 bytes in buffer[] */

+ if(outLen == 1) {

+ *target++ = buffer[0];

+ if(offsets) {

+ *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */

+ }

+ } else if(outLen == 2 && (target + 2) <= targetLimit) {

+ *target++ = buffer[0];

+ *target++ = buffer[1];

+ if(offsets) {

+ int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));

+ *offsets++ = sourceIndex;

+ }

+ } else {

+ fromUWriteUInt8(

+ cnv,

+ buffer, outLen,

+ &target, (const char *)targetLimit,

+ &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),

+ err);

+ if(U_FAILURE(*err)) {

+ break;

+ }

+ } /* end if(myTargetIndex<myTargetLength) */

+ else{

+ *err =U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+ }/* end while(mySourceIndex<mySourceLength) */

+ /*

+ * the end of the input stream and detection of truncated input

+ * are handled by the framework, but for ISO-2022-JP conversion

+ * we need to be in ASCII mode at the very end

+ *

+ * conditions:

+ * successful

+ * in SO mode or not in ASCII mode

+ * end of input and no truncated input

+ */

+ if( U_SUCCESS(*err) &&

+ (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&

+ args->flush && source>=sourceLimit && cnv->fromUChar32==0

+ ) {

+ int32_t sourceIndex;

+ outLen = 0;

+ if(pFromU2022State->g != 0) {

+ buffer[outLen++] = UCNV_SI;

+ pFromU2022State->g = 0;

+ }

+ if(pFromU2022State->cs[0] != ASCII) {

+ int32_t escLen = escSeqCharsLen[ASCII];

+ uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);

+ outLen += escLen;

+ pFromU2022State->cs[0] = (int8_t)ASCII;

+ }

+ /* get the source index of the last input character */

+ /*

+ * TODO this would be simpler and more reliable if we used a pair

+ * of sourceIndex/prevSourceIndex like in ucnvmbcs.c

+ * so that we could simply use the prevSourceIndex here;

+ * this code gives an incorrect result for the rare case of an unmatched

+ * trail surrogate that is alone in the last buffer of the text stream

+ */

+ sourceIndex=(int32_t)(source-args->source);

+ if(sourceIndex>0) {

+ --sourceIndex;

+ if( U16_IS_TRAIL(args->source[sourceIndex]) &&

+ (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))

+ ) {

+ --sourceIndex;

+ }

+ } else {

+ sourceIndex=-1;

+ }

+ fromUWriteUInt8(

+ cnv,

+ buffer, outLen,

+ &target, (const char *)targetLimit,

+ &offsets, sourceIndex,

+ err);

+ }

+ /*save the state and return */

+ args->source = source;

+ args->target = (char*)target;

+/*************** to unicode *******************/

+static void

+UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

+ UErrorCode* err){

+ char tempBuf[2];

+ const char *mySource = (char *) args->source;

+ UChar *myTarget = args->target;

+ const char *mySourceLimit = args->sourceLimit;

+ uint32_t targetUniChar = 0x0000;

+ uint32_t mySourceChar = 0x0000;

+ uint32_t tmpSourceChar = 0x0000;

+ UConverterDataISO2022* myData;

+ ISO2022State *pToU2022State;

+ StateEnum cs;

+ myData=(UConverterDataISO2022*)(args->converter->extraInfo);

+ pToU2022State = &myData->toU2022State;

+ if(myData->key != 0) {

+ /* continue with a partial escape sequence */

+ goto escape;

+ } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {

+ /* continue with a partial double-byte character */

+ mySourceChar = args->converter->toUBytes[0];

+ args->converter->toULength = 0;

+ cs = (StateEnum)pToU2022State->cs[pToU2022State->g];

+ targetUniChar = missingCharMarker;

+ goto getTrailByte;

+ }

+ while(mySource < mySourceLimit){

+ targetUniChar =missingCharMarker;

+ if(myTarget < args->targetLimit){

+ mySourceChar= (unsigned char) *mySource++;

+ switch(mySourceChar) {

+ case UCNV_SI:

+ if(myData->version==3) {

+ pToU2022State->g=0;

+ continue;

+ } else {

+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */

+ myData->isEmptySegment = FALSE; /* reset this, we have a different error */

+ break;

+ }

+ case UCNV_SO:

+ if(myData->version==3) {

+ /* JIS7: switch to G1 half-width Katakana */

+ pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;

+ pToU2022State->g=1;

+ continue;

+ } else {

+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */

+ myData->isEmptySegment = FALSE; /* reset this, we have a different error */

+ break;

+ }

+ case ESC_2022:

+ mySource--;

+escape:

+ {

+ const char * mySourceBefore = mySource;

+ int8_t toULengthBefore = args->converter->toULength;

+ changeState_2022(args->converter,&(mySource),

+ mySourceLimit, ISO_2022_JP,err);

+ /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */

+ if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ args->converter->toUCallbackReason = UCNV_IRREGULAR;

+ args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));

+ }

+ /* invalid or illegal escape sequence */

+ if(U_FAILURE(*err)){

+ args->target = myTarget;

+ args->source = mySource;

+ myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */

+ return;

+ }

+ /* If we successfully completed an escape sequence, we begin a new segment, empty so far */

+ if(myData->key==0) {

+ myData->isEmptySegment = TRUE;

+ }

+ continue;

+ /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */

+ case CR:

+ /*falls through*/

+ case LF:

+ /* automatically reset to single-byte mode */

+ if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {

+ pToU2022State->cs[0] = (int8_t)ASCII;

+ }

+ pToU2022State->cs[2] = 0;

+ pToU2022State->g = 0;

+ /* falls through */

+ default:

+ /* convert one or two bytes */

+ myData->isEmptySegment = FALSE;

+ cs = (StateEnum)pToU2022State->cs[pToU2022State->g];

+ if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&

+ !IS_JP_DBCS(cs)

+ ) {

+ /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */

+ targetUniChar = mySourceChar + (HWKANA_START - 0xa1);

+ /* return from a single-shift state to the previous one */

+ if(pToU2022State->g >= 2) {

+ pToU2022State->g=pToU2022State->prevG;

+ }

+ } else switch(cs) {

+ case ASCII:

+ if(mySourceChar <= 0x7f) {

+ targetUniChar = mySourceChar;

+ }

+ break;

+ case ISO8859_1:

+ if(mySourceChar <= 0x7f) {

+ targetUniChar = mySourceChar + 0x80;

+ }

+ /* return from a single-shift state to the previous one */

+ pToU2022State->g=pToU2022State->prevG;

+ break;

+ case ISO8859_7:

+ if(mySourceChar <= 0x7f) {

+ /* convert mySourceChar+0x80 to use a normal 8-bit table */

+ targetUniChar =

+ _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(

+ myData->myConverterArray[cs],

+ mySourceChar + 0x80);

+ }

+ /* return from a single-shift state to the previous one */

+ pToU2022State->g=pToU2022State->prevG;

+ break;

+ case JISX201:

+ if(mySourceChar <= 0x7f) {

+ targetUniChar = jisx201ToU(mySourceChar);

+ }

+ break;

+ case HWKANA_7BIT:

+ if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {

+ /* 7-bit halfwidth Katakana */

+ targetUniChar = mySourceChar + (HWKANA_START - 0x21);

+ }

+ break;

+ default:

+ /* G0 DBCS */

+ if(mySource < mySourceLimit) {

+ int leadIsOk, trailIsOk;

+ uint8_t trailByte;

+getTrailByte:

+ trailByte = (uint8_t)*mySource;

+ /*

+ * Ticket 5691: consistent illegal sequences:

+ * - We include at least the first byte in the illegal sequence.

+ * - If any of the non-initial bytes could be the start of a character,

+ * we stop the illegal sequence before the first one of those.

+ *

+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is

+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.

+ * Otherwise we convert or report the pair of bytes.

+ */

+ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);

+ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);

+ if (leadIsOk && trailIsOk) {

+ ++mySource;

+ tmpSourceChar = (mySourceChar << 8) | trailByte;

+ if(cs == JISX208) {

+ _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);

+ mySourceChar = tmpSourceChar;

+ } else {

+ /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */

+ mySourceChar = tmpSourceChar;

+ if (cs == KSC5601) {

+ tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */

+ }

+ tempBuf[0] = (char)(tmpSourceChar >> 8);

+ tempBuf[1] = (char)(tmpSourceChar);

+ }

+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);

+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {

+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */

+ ++mySource;

+ /* add another bit so that the code below writes 2 bytes in case of error */

+ mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;

+ }

+ } else {

+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;

+ args->converter->toULength = 1;

+ goto endloop;

+ }

+ } /* End of inner switch */

+ break;

+ } /* End of outer switch */

+ if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){

+ if(args->offsets){

+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

+ }

+ *(myTarget++)=(UChar)targetUniChar;

+ }

+ else if(targetUniChar > missingCharMarker){

+ /* disassemble the surrogate pair and write to output*/

+ targetUniChar-=0x0010000;

+ *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));

+ if(args->offsets){

+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

+ }

+ ++myTarget;

+ if(myTarget< args->targetLimit){

+ *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

+ if(args->offsets){

+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

+ }

+ ++myTarget;

+ }else{

+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=

+ (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

+ }

+ else{

+ /* Call the callback function*/

+ toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);

+ break;

+ }

+ else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */

+ *err =U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+endloop:

+ args->target = myTarget;

+ args->source = mySource;

+/***************************************************************

+* Rules for ISO-2022-KR encoding

+* i) The KSC5601 designator sequence should appear only once in a file,

+* at the begining of a line before any KSC5601 characters. This usually

+* means that it appears by itself on the first line of the file

+* ii) There are only 2 shifting sequences SO to shift into double byte mode

+* and SI to shift into single byte mode

+*/

+static void

+UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){

+ UConverter* saveConv = args->converter;

+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;

+ args->converter=myConverterData->currentConverter;

+ myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;

+ ucnv_MBCSFromUnicodeWithOffsets(args,err);

+ saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;

+ if(*err == U_BUFFER_OVERFLOW_ERROR) {

+ if(myConverterData->currentConverter->charErrorBufferLength > 0) {

+ uprv_memcpy(

+ saveConv->charErrorBuffer,

+ myConverterData->currentConverter->charErrorBuffer,

+ myConverterData->currentConverter->charErrorBufferLength);

+ }

+ saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;

+ myConverterData->currentConverter->charErrorBufferLength = 0;

+ }

+ args->converter=saveConv;

+static void

+UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){

+ const UChar *source = args->source;

+ const UChar *sourceLimit = args->sourceLimit;

+ unsigned char *target = (unsigned char *) args->target;

+ unsigned char *targetLimit = (unsigned char *) args->targetLimit;

+ int32_t* offsets = args->offsets;

+ uint32_t targetByteUnit = 0x0000;

+ UChar32 sourceChar = 0x0000;

+ UBool isTargetByteDBCS;

+ UBool oldIsTargetByteDBCS;

+ UConverterDataISO2022 *converterData;

+ UConverterSharedData* sharedData;

+ UBool useFallback;

+ int32_t length =0;

+ converterData=(UConverterDataISO2022*)args->converter->extraInfo;

+ /* if the version is 1 then the user is requesting

+ * conversion with ibm-25546 pass the arguments to

+ * MBCS converter and return

+ */

+ if(converterData->version==1){

+ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);

+ return;

+ }

+ /* initialize data */

+ sharedData = converterData->currentConverter->sharedData;

+ useFallback = args->converter->useFallback;

+ isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;

+ oldIsTargetByteDBCS = isTargetByteDBCS;

+ isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;

+ if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {

+ goto getTrail;

+ }

+ while(source < sourceLimit){

+ targetByteUnit = missingCharMarker;

+ if(target < (unsigned char*) args->targetLimit){

+ sourceChar = *source++;

+ /* do not convert SO/SI/ESC */

+ if(IS_2022_CONTROL(sourceChar)) {

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ args->converter->fromUChar32=sourceChar;

+ break;

+ }

+ length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);

+ if(length < 0) {

+ length = -length; /* fallback */

+ }

+ /* only DBCS or SBCS characters are expected*/

+ /* DB characters with high bit set to 1 are expected */

+ if( length > 2 || length==0 ||

+ (length == 1 && targetByteUnit > 0x7f) ||

+ (length == 2 &&

+ ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||

+ (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))

+ ) {

+ targetByteUnit=missingCharMarker;

+ }

+ if (targetByteUnit != missingCharMarker){

+ oldIsTargetByteDBCS = isTargetByteDBCS;

+ isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);

+ /* append the shift sequence */

+ if (oldIsTargetByteDBCS != isTargetByteDBCS ){

+ if (isTargetByteDBCS)

+ *target++ = UCNV_SO;

+ else

+ *target++ = UCNV_SI;

+ if(offsets)

+ *(offsets++) = (int32_t)(source - args->source-1);

+ }

+ /* write the targetUniChar to target */

+ if(targetByteUnit <= 0x00FF){

+ if( target < targetLimit){

+ *(target++) = (unsigned char) targetByteUnit;

+ if(offsets){

+ *(offsets++) = (int32_t)(source - args->source-1);

+ }

+ }else{

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);

+ *err = U_BUFFER_OVERFLOW_ERROR;

+ }

+ }else{

+ if(target < targetLimit){

+ *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);

+ if(offsets){

+ *(offsets++) = (int32_t)(source - args->source-1);

+ }

+ if(target < targetLimit){

+ *(target++) =(unsigned char) (targetByteUnit -0x80);

+ if(offsets){

+ *(offsets++) = (int32_t)(source - args->source-1);

+ }

+ }else{

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);

+ *err = U_BUFFER_OVERFLOW_ERROR;

+ }

+ }else{

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);

+ args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);

+ *err = U_BUFFER_OVERFLOW_ERROR;

+ }

+ else{

+ /* oops.. the code point is unassingned

+ * set the error and reason

+ */

+ /*check if the char is a First surrogate*/

+ if(UTF_IS_SURROGATE(sourceChar)) {

+ if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

+getTrail:

+ /*look ahead to find the trail surrogate*/

+ if(source < sourceLimit) {

+ /* test the following code unit */

+ UChar trail=(UChar) *source;

+ if(UTF_IS_SECOND_SURROGATE(trail)) {

+ source++;

+ sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

+ *err = U_INVALID_CHAR_FOUND;

+ /* convert this surrogate code point */

+ /* exit this condition tree */

+ } else {

+ /* this is an unmatched lead code unit (1st surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ }

+ } else {

+ /* no more input */

+ *err = U_ZERO_ERROR;

+ }

+ } else {

+ /* this is an unmatched trail code unit (2nd surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ }

+ } else {

+ /* callback(unassigned) for a BMP code point */

+ *err = U_INVALID_CHAR_FOUND;

+ }

+ args->converter->fromUChar32=sourceChar;

+ break;

+ }

+ } /* end if(myTargetIndex<myTargetLength) */

+ else{

+ *err =U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+ }/* end while(mySourceIndex<mySourceLength) */

+ /*

+ * the end of the input stream and detection of truncated input

+ * are handled by the framework, but for ISO-2022-KR conversion

+ * we need to be in ASCII mode at the very end

+ *

+ * conditions:

+ * successful

+ * not in ASCII mode

+ * end of input and no truncated input

+ */

+ if( U_SUCCESS(*err) &&

+ isTargetByteDBCS &&

+ args->flush && source>=sourceLimit && args->converter->fromUChar32==0

+ ) {

+ int32_t sourceIndex;

+ /* we are switching to ASCII */

+ isTargetByteDBCS=FALSE;

+ /* get the source index of the last input character */

+ /*

+ * TODO this would be simpler and more reliable if we used a pair

+ * of sourceIndex/prevSourceIndex like in ucnvmbcs.c

+ * so that we could simply use the prevSourceIndex here;

+ * this code gives an incorrect result for the rare case of an unmatched

+ * trail surrogate that is alone in the last buffer of the text stream

+ */

+ sourceIndex=(int32_t)(source-args->source);

+ if(sourceIndex>0) {

+ --sourceIndex;

+ if( U16_IS_TRAIL(args->source[sourceIndex]) &&

+ (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))

+ ) {

+ --sourceIndex;

+ }

+ } else {

+ sourceIndex=-1;

+ }

+ fromUWriteUInt8(

+ args->converter,

+ SHIFT_IN_STR, 1,

+ &target, (const char *)targetLimit,

+ &offsets, sourceIndex,

+ err);

+ }

+ /*save the state and return */

+ args->source = source;

+ args->target = (char*)target;

+ args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;

+/************************ To Unicode ***************************************/

+static void

+UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,

+ UErrorCode* err){

+ char const* sourceStart;

+ UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);

+ UConverterToUnicodeArgs subArgs;

+ int32_t minArgsSize;

+ /* set up the subconverter arguments */

+ if(args->size<sizeof(UConverterToUnicodeArgs)) {

+ minArgsSize = args->size;

+ } else {

+ minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);

+ }

+ uprv_memcpy(&subArgs, args, minArgsSize);

+ subArgs.size = (uint16_t)minArgsSize;

+ subArgs.converter = myData->currentConverter;

+ /* remember the original start of the input for offsets */

+ sourceStart = args->source;

+ if(myData->key != 0) {

+ /* continue with a partial escape sequence */

+ goto escape;

+ }

+ while(U_SUCCESS(*err) && args->source < args->sourceLimit) {

+ /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/

+ subArgs.source = args->source;

+ subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);

+ if(subArgs.source != subArgs.sourceLimit) {

+ /*

+ * get the current partial byte sequence

+ *

+ * it needs to be moved between the public and the subconverter

+ * so that the conversion framework, which only sees the public

+ * converter, can handle truncated and illegal input etc.

+ */

+ if(args->converter->toULength > 0) {

+ uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);

+ }

+ subArgs.converter->toULength = args->converter->toULength;

+ /*

+ * Convert up to the end of the input, or to before the next escape character.

+ * Does not handle conversion extensions because the preToU[] state etc.

+ * is not copied.

+ */

+ ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);

+ if(args->offsets != NULL && sourceStart != args->source) {

+ /* update offsets to base them on the actual start of the input */

+ int32_t *offsets = args->offsets;

+ UChar *target = args->target;

+ int32_t delta = (int32_t)(args->source - sourceStart);

+ while(target < subArgs.target) {

+ if(*offsets >= 0) {

+ *offsets += delta;

+ }

+ ++offsets;

+ ++target;

+ }

+ args->source = subArgs.source;

+ args->target = subArgs.target;

+ args->offsets = subArgs.offsets;

+ /* copy input/error/overflow buffers */

+ if(subArgs.converter->toULength > 0) {

+ uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);

+ }

+ args->converter->toULength = subArgs.converter->toULength;

+ if(*err == U_BUFFER_OVERFLOW_ERROR) {

+ if(subArgs.converter->UCharErrorBufferLength > 0) {

+ uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,

+ subArgs.converter->UCharErrorBufferLength);

+ }

+ args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;

+ subArgs.converter->UCharErrorBufferLength = 0;

+ }

+ if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {

+ return;

+ }

+escape:

+ changeState_2022(args->converter,

+ &(args->source),

+ args->sourceLimit,

+ ISO_2022_KR,

+ err);

+ }

+static void

+UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

+ UErrorCode* err){

+ char tempBuf[2];

+ const char *mySource = ( char *) args->source;

+ UChar *myTarget = args->target;

+ const char *mySourceLimit = args->sourceLimit;

+ UChar32 targetUniChar = 0x0000;

+ UChar mySourceChar = 0x0000;

+ UConverterDataISO2022* myData;

+ UConverterSharedData* sharedData ;

+ UBool useFallback;

+ myData=(UConverterDataISO2022*)(args->converter->extraInfo);

+ if(myData->version==1){

+ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);

+ return;

+ }

+ /* initialize state */

+ sharedData = myData->currentConverter->sharedData;

+ useFallback = args->converter->useFallback;

+ if(myData->key != 0) {

+ /* continue with a partial escape sequence */

+ goto escape;

+ } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {

+ /* continue with a partial double-byte character */

+ mySourceChar = args->converter->toUBytes[0];

+ args->converter->toULength = 0;

+ goto getTrailByte;

+ }

+ while(mySource< mySourceLimit){

+ if(myTarget < args->targetLimit){

+ mySourceChar= (unsigned char) *mySource++;

+ if(mySourceChar==UCNV_SI){

+ myData->toU2022State.g = 0;

+ if (myData->isEmptySegment) {

+ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ args->converter->toUCallbackReason = UCNV_IRREGULAR;

+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;

+ args->converter->toULength = 1;

+ args->target = myTarget;

+ args->source = mySource;

+ return;

+ }

+ /*consume the source */

+ continue;

+ }else if(mySourceChar==UCNV_SO){

+ myData->toU2022State.g = 1;

+ myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */

+ /*consume the source */

+ continue;

+ }else if(mySourceChar==ESC_2022){

+ mySource--;

+escape:

+ myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */

+ changeState_2022(args->converter,&(mySource),

+ mySourceLimit, ISO_2022_KR, err);

+ if(U_FAILURE(*err)){

+ args->target = myTarget;

+ args->source = mySource;

+ return;

+ }

+ continue;

+ }

+ myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */

+ if(myData->toU2022State.g == 1) {

+ if(mySource < mySourceLimit) {

+ int leadIsOk, trailIsOk;

+ uint8_t trailByte;

+getTrailByte:

+ targetUniChar = missingCharMarker;

+ trailByte = (uint8_t)*mySource;

+ /*

+ * Ticket 5691: consistent illegal sequences:

+ * - We include at least the first byte in the illegal sequence.

+ * - If any of the non-initial bytes could be the start of a character,

+ * we stop the illegal sequence before the first one of those.

+ *

+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is

+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.

+ * Otherwise we convert or report the pair of bytes.

+ */

+ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);

+ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);

+ if (leadIsOk && trailIsOk) {

+ ++mySource;

+ tempBuf[0] = (char)(mySourceChar + 0x80);

+ tempBuf[1] = (char)(trailByte + 0x80);

+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);

+ mySourceChar = (mySourceChar << 8) | trailByte;

+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {

+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */

+ ++mySource;

+ /* add another bit so that the code below writes 2 bytes in case of error */

+ mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;

+ }

+ } else {

+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;

+ args->converter->toULength = 1;

+ break;

+ }

+ else if(mySourceChar <= 0x7f) {

+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);

+ } else {

+ targetUniChar = 0xffff;

+ }

+ if(targetUniChar < 0xfffe){

+ if(args->offsets) {

+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

+ }

+ *(myTarget++)=(UChar)targetUniChar;

+ }

+ else {

+ /* Call the callback function*/

+ toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);

+ break;

+ }

+ else{

+ *err =U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+ args->target = myTarget;

+ args->source = mySource;

+/*************************** END ISO2022-KR *********************************/

+/*************************** ISO-2022-CN *********************************

+* Rules for ISO-2022-CN Encoding:

+* i) The designator sequence must appear once on a line before any instance

+* of character set it designates.

+* ii) If two lines contain characters from the same character set, both lines

+* must include the designator sequence.

+* iii) Once the designator sequence is known, a shifting sequence has to be found

+* to invoke the shifting

+* iv) All lines start in ASCII and end in ASCII.

+* v) Four shifting sequences are employed for this purpose:

+* Sequcence ASCII Eq Charsets

+* ---------- ------- ---------

+* SI <SI> US-ASCII

+* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165

+* SS2 <ESC>N CNS-11643-1992 Plane 2

+* SS3 <ESC>O CNS-11643-1992 Planes 3-7

+* vi)

+* SOdesignator : ESC "$" ")" finalchar_for_SO

+* SS2designator : ESC "$" "*" finalchar_for_SS2

+* SS3designator : ESC "$" "+" finalchar_for_SS3

+* ESC $ ) A Indicates the bytes following SO are Chinese

+* characters as defined in GB 2312-80, until

+* another SOdesignation appears

+* ESC $ ) E Indicates the bytes following SO are as defined

+* in ISO-IR-165 (for details, see section 2.1),

+* until another SOdesignation appears

+* ESC $ ) G Indicates the bytes following SO are as defined

+* in CNS 11643-plane-1, until another

+* SOdesignation appears

+* ESC $ * H Indicates the two bytes immediately following

+* SS2 is a Chinese character as defined in CNS

+* 11643-plane-2, until another SS2designation

+* appears

+* (Meaning <ESC>N must preceed every 2 byte

+* sequence.)

+* ESC $ + I Indicates the immediate two bytes following SS3

+* is a Chinese character as defined in CNS

+* 11643-plane-3, until another SS3designation

+* appears

+* (Meaning <ESC>O must preceed every 2 byte

+* sequence.)

+* ESC $ + J Indicates the immediate two bytes following SS3

+* is a Chinese character as defined in CNS

+* 11643-plane-4, until another SS3designation

+* appears

+* (In English: <ESC>O must preceed every 2 byte

+* sequence.)

+* ESC $ + K Indicates the immediate two bytes following SS3

+* is a Chinese character as defined in CNS

+* 11643-plane-5, until another SS3designation

+* appears

+* ESC $ + L Indicates the immediate two bytes following SS3

+* is a Chinese character as defined in CNS

+* 11643-plane-6, until another SS3designation

+* appears

+* ESC $ + M Indicates the immediate two bytes following SS3

+* is a Chinese character as defined in CNS

+* 11643-plane-7, until another SS3designation

+* appears

+* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and

+* has its own designation information before any Chinese characters

+* appear

+*/

+/* The following are defined this way to make the strings truely readonly */

+static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";

+static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";

+static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";

+static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";

+static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";

+static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";

+static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";

+static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";

+static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";

+/********************** ISO2022-CN Data **************************/

+static const char* const escSeqCharsCN[10] ={

+ SHIFT_IN_STR, /* ASCII */

+ GB_2312_80_STR,

+ ISO_IR_165_STR,

+ CNS_11643_1992_Plane_1_STR,

+ CNS_11643_1992_Plane_2_STR,

+ CNS_11643_1992_Plane_3_STR,

+ CNS_11643_1992_Plane_4_STR,

+ CNS_11643_1992_Plane_5_STR,

+ CNS_11643_1992_Plane_6_STR,

+ CNS_11643_1992_Plane_7_STR

+};

+static void

+UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){

+ UConverter *cnv = args->converter;

+ UConverterDataISO2022 *converterData;

+ ISO2022State *pFromU2022State;

+ uint8_t *target = (uint8_t *) args->target;

+ const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;

+ const UChar* source = args->source;

+ const UChar* sourceLimit = args->sourceLimit;

+ int32_t* offsets = args->offsets;

+ UChar32 sourceChar;

+ char buffer[8];

+ int32_t len;

+ int8_t choices[3];

+ int32_t choiceCount;

+ uint32_t targetValue = 0;

+ UBool useFallback;

+ /* set up the state */

+ converterData = (UConverterDataISO2022*)cnv->extraInfo;

+ pFromU2022State = &converterData->fromU2022State;

+ choiceCount = 0;

+ /* check if the last codepoint of previous buffer was a lead surrogate*/

+ if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {

+ goto getTrail;

+ }

+ while( source < sourceLimit){

+ if(target < targetLimit){

+ sourceChar = *(source++);

+ /*check if the char is a First surrogate*/

+ if(UTF_IS_SURROGATE(sourceChar)) {

+ if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

+getTrail:

+ /*look ahead to find the trail surrogate*/

+ if(source < sourceLimit) {

+ /* test the following code unit */

+ UChar trail=(UChar) *source;

+ if(UTF_IS_SECOND_SURROGATE(trail)) {

+ source++;

+ sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

+ cnv->fromUChar32=0x00;

+ /* convert this supplementary code point */

+ /* exit this condition tree */

+ } else {

+ /* this is an unmatched lead code unit (1st surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ } else {

+ /* no more input */

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ } else {

+ /* this is an unmatched trail code unit (2nd surrogate) */

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ /* do the conversion */

+ if(sourceChar <= 0x007f ){

+ /* do not convert SO/SI/ESC */

+ if(IS_2022_CONTROL(sourceChar)) {

+ /* callback(illegal) */

+ *err=U_ILLEGAL_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ /* US-ASCII */

+ if(pFromU2022State->g == 0) {

+ buffer[0] = (char)sourceChar;

+ len = 1;

+ } else {

+ buffer[0] = UCNV_SI;

+ buffer[1] = (char)sourceChar;

+ len = 2;

+ pFromU2022State->g = 0;

+ choiceCount = 0;

+ }

+ if(sourceChar == CR || sourceChar == LF) {

+ /* reset the state at the end of a line */

+ uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));

+ choiceCount = 0;

+ }

+ else{

+ /* convert U+0080..U+10ffff */

+ int32_t i;

+ int8_t cs, g;

+ if(choiceCount == 0) {

+ /* try the current SO/G1 converter first */

+ choices[0] = pFromU2022State->cs[1];

+ /* default to GB2312_1 if none is designated yet */

+ if(choices[0] == 0) {

+ choices[0] = GB2312_1;

+ }

+ if(converterData->version == 0) {

+ /* ISO-2022-CN */

+ /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */

+ if(choices[0] == GB2312_1) {

+ choices[1] = (int8_t)CNS_11643_1;

+ } else {

+ choices[1] = (int8_t)GB2312_1;

+ }

+ choiceCount = 2;

+ } else if (converterData->version == 1) {

+ /* ISO-2022-CN-EXT */

+ /* try one of the other converters */

+ switch(choices[0]) {

+ case GB2312_1:

+ choices[1] = (int8_t)CNS_11643_1;

+ choices[2] = (int8_t)ISO_IR_165;

+ break;

+ case ISO_IR_165:

+ choices[1] = (int8_t)GB2312_1;

+ choices[2] = (int8_t)CNS_11643_1;

+ break;

+ default: /* CNS_11643_x */

+ choices[1] = (int8_t)GB2312_1;

+ choices[2] = (int8_t)ISO_IR_165;

+ break;

+ }

+ choiceCount = 3;

+ } else {

+ choices[0] = (int8_t)CNS_11643_1;

+ choices[1] = (int8_t)GB2312_1;

+ }

+ cs = g = 0;

+ /*

+ * len==0: no mapping found yet

+ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks

+ * len>0: found a roundtrip result, done

+ */

+ len = 0;

+ /*

+ * We will turn off useFallback after finding a fallback,

+ * but we still get fallbacks from PUA code points as usual.

+ * Therefore, we will also need to check that we don't overwrite

+ * an early fallback with a later one.

+ */

+ useFallback = cnv->useFallback;

+ for(i = 0; i < choiceCount && len <= 0; ++i) {

+ int8_t cs0 = choices[i];

+ if(cs0 > 0) {

+ uint32_t value;

+ int32_t len2;

+ if(cs0 >= CNS_11643_0) {

+ len2 = MBCS_FROM_UCHAR32_ISO2022(

+ converterData->myConverterArray[CNS_11643],

+ sourceChar,

+ &value,

+ useFallback,

+ MBCS_OUTPUT_3);

+ if(len2 == 3 || (len2 == -3 && len == 0)) {

+ targetValue = value;

+ cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);

+ if(len2 >= 0) {

+ len = 2;

+ } else {

+ len = -2;

+ useFallback = FALSE;

+ }

+ if(cs == CNS_11643_1) {

+ g = 1;

+ } else if(cs == CNS_11643_2) {

+ g = 2;

+ } else /* plane 3..7 */ if(converterData->version == 1) {

+ g = 3;

+ } else {

+ /* ISO-2022-CN (without -EXT) does not support plane 3..7 */

+ len = 0;

+ }

+ } else {

+ /* GB2312_1 or ISO-IR-165 */

+ len2 = MBCS_FROM_UCHAR32_ISO2022(

+ converterData->myConverterArray[cs0],

+ sourceChar,

+ &value,

+ useFallback,

+ MBCS_OUTPUT_2);

+ if(len2 == 2 || (len2 == -2 && len == 0)) {

+ targetValue = value;

+ len = len2;

+ cs = cs0;

+ g = 1;

+ useFallback = FALSE;

+ }

+ if(len != 0) {

+ len = 0; /* count output bytes; it must have been abs(len) == 2 */

+ /* write the designation sequence if necessary */

+ if(cs != pFromU2022State->cs[g]) {

+ if(cs < CNS_11643) {

+ uprv_memcpy(buffer, escSeqCharsCN[cs], 4);

+ } else {

+ uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);

+ }

+ len = 4;

+ pFromU2022State->cs[g] = cs;

+ if(g == 1) {

+ /* changing the SO/G1 charset invalidates the choices[] */

+ choiceCount = 0;

+ }

+ /* write the shift sequence if necessary */

+ if(g != pFromU2022State->g) {

+ switch(g) {

+ case 1:

+ buffer[len++] = UCNV_SO;

+ /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */

+ pFromU2022State->g = 1;

+ break;

+ case 2:

+ buffer[len++] = 0x1b;

+ buffer[len++] = 0x4e;

+ break;

+ default: /* case 3 */

+ buffer[len++] = 0x1b;

+ buffer[len++] = 0x4f;

+ break;

+ }

+ /* write the two output bytes */

+ buffer[len++] = (char)(targetValue >> 8);

+ buffer[len++] = (char)targetValue;

+ } else {

+ /* if we cannot find the character after checking all codepages

+ * then this is an error

+ */

+ *err = U_INVALID_CHAR_FOUND;

+ cnv->fromUChar32=sourceChar;

+ break;

+ }

+ /* output len>0 bytes in buffer[] */

+ if(len == 1) {

+ *target++ = buffer[0];

+ if(offsets) {

+ *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */

+ }

+ } else if(len == 2 && (target + 2) <= targetLimit) {

+ *target++ = buffer[0];

+ *target++ = buffer[1];

+ if(offsets) {

+ int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));

+ *offsets++ = sourceIndex;

+ }

+ } else {

+ fromUWriteUInt8(

+ cnv,

+ buffer, len,

+ &target, (const char *)targetLimit,

+ &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),

+ err);

+ if(U_FAILURE(*err)) {

+ break;

+ }

+ } /* end if(myTargetIndex<myTargetLength) */

+ else{

+ *err =U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+ }/* end while(mySourceIndex<mySourceLength) */

+ /*

+ * the end of the input stream and detection of truncated input

+ * are handled by the framework, but for ISO-2022-CN conversion

+ * we need to be in ASCII mode at the very end

+ *

+ * conditions:

+ * successful

+ * not in ASCII mode

+ * end of input and no truncated input

+ */

+ if( U_SUCCESS(*err) &&

+ pFromU2022State->g!=0 &&

+ args->flush && source>=sourceLimit && cnv->fromUChar32==0

+ ) {

+ int32_t sourceIndex;

+ /* we are switching to ASCII */

+ pFromU2022State->g=0;

+ /* get the source index of the last input character */

+ /*

+ * TODO this would be simpler and more reliable if we used a pair

+ * of sourceIndex/prevSourceIndex like in ucnvmbcs.c

+ * so that we could simply use the prevSourceIndex here;

+ * this code gives an incorrect result for the rare case of an unmatched

+ * trail surrogate that is alone in the last buffer of the text stream

+ */

+ sourceIndex=(int32_t)(source-args->source);

+ if(sourceIndex>0) {

+ --sourceIndex;

+ if( U16_IS_TRAIL(args->source[sourceIndex]) &&

+ (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))

+ ) {

+ --sourceIndex;

+ }

+ } else {

+ sourceIndex=-1;

+ }

+ fromUWriteUInt8(

+ cnv,

+ SHIFT_IN_STR, 1,

+ &target, (const char *)targetLimit,

+ &offsets, sourceIndex,

+ err);

+ }

+ /*save the state and return */

+ args->source = source;

+ args->target = (char*)target;

+static void

+UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

+ UErrorCode* err){

+ char tempBuf[3];

+ const char *mySource = (char *) args->source;

+ UChar *myTarget = args->target;

+ const char *mySourceLimit = args->sourceLimit;

+ uint32_t targetUniChar = 0x0000;

+ uint32_t mySourceChar = 0x0000;

+ UConverterDataISO2022* myData;

+ ISO2022State *pToU2022State;

+ myData=(UConverterDataISO2022*)(args->converter->extraInfo);

+ pToU2022State = &myData->toU2022State;

+ if(myData->key != 0) {

+ /* continue with a partial escape sequence */

+ goto escape;

+ } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {

+ /* continue with a partial double-byte character */

+ mySourceChar = args->converter->toUBytes[0];

+ args->converter->toULength = 0;

+ targetUniChar = missingCharMarker;

+ goto getTrailByte;

+ }

+ while(mySource < mySourceLimit){

+ targetUniChar =missingCharMarker;

+ if(myTarget < args->targetLimit){

+ mySourceChar= (unsigned char) *mySource++;

+ switch(mySourceChar){

+ case UCNV_SI:

+ pToU2022State->g=0;

+ if (myData->isEmptySegment) {

+ myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ args->converter->toUCallbackReason = UCNV_IRREGULAR;

+ args->converter->toUBytes[0] = mySourceChar;

+ args->converter->toULength = 1;

+ args->target = myTarget;

+ args->source = mySource;

+ return;

+ }

+ continue;

+ case UCNV_SO:

+ if(pToU2022State->cs[1] != 0) {

+ pToU2022State->g=1;

+ myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */

+ continue;

+ } else {

+ /* illegal to have SO before a matching designator */

+ myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */

+ break;

+ }

+ case ESC_2022:

+ mySource--;

+escape:

+ {

+ const char * mySourceBefore = mySource;

+ int8_t toULengthBefore = args->converter->toULength;

+ changeState_2022(args->converter,&(mySource),

+ mySourceLimit, ISO_2022_CN,err);

+ /* After SO there must be at least one character before a designator (designator error handled separately) */

+ if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {

+ *err = U_ILLEGAL_ESCAPE_SEQUENCE;

+ args->converter->toUCallbackReason = UCNV_IRREGULAR;

+ args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));

+ }

+ /* invalid or illegal escape sequence */

+ if(U_FAILURE(*err)){

+ args->target = myTarget;

+ args->source = mySource;

+ myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */

+ return;

+ }

+ continue;

+ /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */

+ case CR:

+ /*falls through*/

+ case LF:

+ uprv_memset(pToU2022State, 0, sizeof(ISO2022State));

+ /* falls through */

+ default:

+ /* convert one or two bytes */

+ myData->isEmptySegment = FALSE;

+ if(pToU2022State->g != 0) {

+ if(mySource < mySourceLimit) {

+ UConverterSharedData *cnv;

+ StateEnum tempState;

+ int32_t tempBufLen;

+ int leadIsOk, trailIsOk;

+ uint8_t trailByte;

+getTrailByte:

+ trailByte = (uint8_t)*mySource;

+ /*

+ * Ticket 5691: consistent illegal sequences:

+ * - We include at least the first byte in the illegal sequence.

+ * - If any of the non-initial bytes could be the start of a character,

+ * we stop the illegal sequence before the first one of those.

+ *

+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is

+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.

+ * Otherwise we convert or report the pair of bytes.

+ */

+ leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);

+ trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);

+ if (leadIsOk && trailIsOk) {

+ ++mySource;

+ tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];

+ if(tempState >= CNS_11643_0) {

+ cnv = myData->myConverterArray[CNS_11643];

+ tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));

+ tempBuf[1] = (char) (mySourceChar);

+ tempBuf[2] = (char) trailByte;

+ tempBufLen = 3;

+ }else{

+ cnv = myData->myConverterArray[tempState];

+ tempBuf[0] = (char) (mySourceChar);

+ tempBuf[1] = (char) trailByte;

+ tempBufLen = 2;

+ }

+ targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);

+ mySourceChar = (mySourceChar << 8) | trailByte;

+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {

+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */

+ ++mySource;

+ /* add another bit so that the code below writes 2 bytes in case of error */

+ mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;

+ }

+ if(pToU2022State->g>=2) {

+ /* return from a single-shift state to the previous one */

+ pToU2022State->g=pToU2022State->prevG;

+ }

+ } else {

+ args->converter->toUBytes[0] = (uint8_t)mySourceChar;

+ args->converter->toULength = 1;

+ goto endloop;

+ }

+ else{

+ if(mySourceChar <= 0x7f) {

+ targetUniChar = (UChar) mySourceChar;

+ }

+ break;

+ }

+ if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){

+ if(args->offsets){

+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

+ }

+ *(myTarget++)=(UChar)targetUniChar;

+ }

+ else if(targetUniChar > missingCharMarker){

+ /* disassemble the surrogate pair and write to output*/

+ targetUniChar-=0x0010000;

+ *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));

+ if(args->offsets){

+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

+ }

+ ++myTarget;

+ if(myTarget< args->targetLimit){

+ *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

+ if(args->offsets){

+ args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

+ }

+ ++myTarget;

+ }else{

+ args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=

+ (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

+ }

+ else{

+ /* Call the callback function*/

+ toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);

+ break;

+ }

+ else{

+ *err =U_BUFFER_OVERFLOW_ERROR;

+ break;

+ }

+endloop:

+ args->target = myTarget;

+ args->source = mySource;

+static void

+_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {

+ UConverter *cnv = args->converter;

+ UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;

+ ISO2022State *pFromU2022State=&myConverterData->fromU2022State;

+ char *p, *subchar;

+ char buffer[8];

+ int32_t length;

+ subchar=(char *)cnv->subChars;

+ length=cnv->subCharLen; /* assume length==1 for most variants */

+ p = buffer;

+ switch(myConverterData->locale[0]){

+ case 'j':

+ {

+ int8_t cs;

+ if(pFromU2022State->g == 1) {

+ /* JIS7: switch from G1 to G0 */

+ pFromU2022State->g = 0;

+ *p++ = UCNV_SI;

+ }

+ cs = pFromU2022State->cs[0];

+ if(cs != ASCII && cs != JISX201) {

+ /* not in ASCII or JIS X 0201: switch to ASCII */

+ pFromU2022State->cs[0] = (int8_t)ASCII;

+ *p++ = '\x1b';

+ *p++ = '\x28';

+ *p++ = '\x42';

+ }

+ *p++ = subchar[0];

+ break;

+ }

+ case 'c':

+ if(pFromU2022State->g != 0) {

+ /* not in ASCII mode: switch to ASCII */

+ pFromU2022State->g = 0;

+ *p++ = UCNV_SI;

+ }

+ *p++ = subchar[0];

+ break;

+ case 'k':

+ if(myConverterData->version == 0) {

+ if(length == 1) {

+ if((UBool)args->converter->fromUnicodeStatus) {

+ /* in DBCS mode: switch to SBCS */

+ args->converter->fromUnicodeStatus = 0;

+ *p++ = UCNV_SI;

+ }

+ *p++ = subchar[0];

+ } else /* length == 2*/ {

+ if(!(UBool)args->converter->fromUnicodeStatus) {

+ /* in SBCS mode: switch to DBCS */

+ args->converter->fromUnicodeStatus = 1;

+ *p++ = UCNV_SO;

+ }

+ *p++ = subchar[0];

+ *p++ = subchar[1];

+ }

+ break;

+ } else {

+ /* save the subconverter's substitution string */

+ uint8_t *currentSubChars = myConverterData->currentConverter->subChars;

+ int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;

+ /* set our substitution string into the subconverter */

+ myConverterData->currentConverter->subChars = (uint8_t *)subchar;

+ myConverterData->currentConverter->subCharLen = (int8_t)length;

+ /* let the subconverter write the subchar, set/retrieve fromUChar32 state */

+ args->converter = myConverterData->currentConverter;

+ myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;

+ ucnv_cbFromUWriteSub(args, 0, err);

+ cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;

+ args->converter = cnv;

+ /* restore the subconverter's substitution string */

+ myConverterData->currentConverter->subChars = currentSubChars;

+ myConverterData->currentConverter->subCharLen = currentSubCharLen;

+ if(*err == U_BUFFER_OVERFLOW_ERROR) {

+ if(myConverterData->currentConverter->charErrorBufferLength > 0) {

+ uprv_memcpy(

+ cnv->charErrorBuffer,

+ myConverterData->currentConverter->charErrorBuffer,

+ myConverterData->currentConverter->charErrorBufferLength);

+ }

+ cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;

+ myConverterData->currentConverter->charErrorBufferLength = 0;

+ }

+ return;

+ }

+ default:

+ /* not expected */

+ break;

+ }

+ ucnv_cbFromUWriteBytes(args,

+ buffer, (int32_t)(p - buffer),

+ offsetIndex, err);

+/*

+ * Structure for cloning an ISO 2022 converter into a single memory block.

+ * ucnv_safeClone() of the converter will align the entire cloneStruct,

+ * and then ucnv_safeClone() of the sub-converter may additionally align

+ * currentConverter inside the cloneStruct, for which we need the deadSpace

+ * after currentConverter.

+ * This is because UAlignedMemory may be larger than the actually

+ * necessary alignment size for the platform.

+ * The other cloneStruct fields will not be moved around,

+ * and are aligned properly with cloneStruct's alignment.

+ */

+struct cloneStruct

+ UConverter cnv;

+ UConverter currentConverter;

+ UAlignedMemory deadSpace;

+ UConverterDataISO2022 mydata;

+};

+static UConverter *

+_ISO_2022_SafeClone(

+ const UConverter *cnv,

+ void *stackBuffer,

+ int32_t *pBufferSize,

+ UErrorCode *status)

+ struct cloneStruct * localClone;

+ UConverterDataISO2022 *cnvData;

+ int32_t i, size;

+ if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */

+ *pBufferSize = (int32_t)sizeof(struct cloneStruct);

+ return NULL;

+ }

+ cnvData = (UConverterDataISO2022 *)cnv->extraInfo;

+ localClone = (struct cloneStruct *)stackBuffer;

+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */

+ uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));

+ localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */

+ localClone->cnv.isExtraLocal = TRUE;

+ /* share the subconverters */

+ if(cnvData->currentConverter != NULL) {

+ size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */

+ localClone->mydata.currentConverter =

+ ucnv_safeClone(cnvData->currentConverter,

+ &localClone->currentConverter,

+ &size, status);

+ if(U_FAILURE(*status)) {

+ return NULL;

+ }

+ for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {

+ if(cnvData->myConverterArray[i] != NULL) {

+ ucnv_incrementRefCount(cnvData->myConverterArray[i]);

+ }

+ return &localClone->cnv;

+static void

+_ISO_2022_GetUnicodeSet(const UConverter *cnv,

+ const USetAdder *sa,

+ UConverterUnicodeSet which,

+ UErrorCode *pErrorCode)

+ int32_t i;

+ UConverterDataISO2022* cnvData;

+ if (U_FAILURE(*pErrorCode)) {

+ return;

+ }

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ if (cnv->sharedData == &_ISO2022Data) {

+ /* We use UTF-8 in this case */

+ sa->addRange(sa->set, 0, 0xd7FF);

+ sa->addRange(sa->set, 0xE000, 0x10FFFF);

+ return;

+ }

+#endif

+ cnvData = (UConverterDataISO2022*)cnv->extraInfo;

+ /* open a set and initialize it with code points that are algorithmically round-tripped */

+ switch(cnvData->locale[0]){

+ case 'j':

+ /* include JIS X 0201 which is hardcoded */

+ sa->add(sa->set, 0xa5);

+ sa->add(sa->set, 0x203e);

+ if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {

+ /* include Latin-1 for some variants of JP */

+ sa->addRange(sa->set, 0, 0xff);

+ } else {

+ /* include ASCII for JP */

+ sa->addRange(sa->set, 0, 0x7f);

+ }

+ if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {

+ /*

+ * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0

+ * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)

+ * use half-width Katakana.

+ * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)

+ * half-width Katakana via the ESC ( I sequence.

+ * However, we only emit (fromUnicode) half-width Katakana according to the

+ * definition of each variant.

+ *

+ * When including fallbacks,

+ * we need to include half-width Katakana Unicode code points for all JP variants because

+ * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).

+ */

+ /* include half-width Katakana for JP */

+ sa->addRange(sa->set, HWKANA_START, HWKANA_END);

+ }

+ break;

+ case 'c':

+ case 'z':

+ /* include ASCII for CN */

+ sa->addRange(sa->set, 0, 0x7f);

+ break;

+ case 'k':

+ /* there is only one converter for KR, and it is not in the myConverterArray[] */

+ cnvData->currentConverter->sharedData->impl->getUnicodeSet(

+ cnvData->currentConverter, sa, which, pErrorCode);

+ /* the loop over myConverterArray[] will simply not find another converter */

+ break;

+ default:

+ break;

+ }

+#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */

+ if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&

+ cnvData->version==0 && i==CNS_11643

+ ) {

+ /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */

+ ucnv_MBCSGetUnicodeSetForBytes(

+ cnvData->myConverterArray[i],

+ sa, UCNV_ROUNDTRIP_SET,

+ 0, 0x81, 0x82,

+ pErrorCode);

+ }

+#endif

+ for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

+ UConverterSetFilter filter;

+ if(cnvData->myConverterArray[i]!=NULL) {

+ if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&

+ cnvData->version==0 && i==CNS_11643

+ ) {

+ /*

+ * Version-specific for CN:

+ * CN version 0 does not map CNS planes 3..7 although

+ * they are all available in the CNS conversion table;

+ * CN version 1 (-EXT) does map them all.

+ * The two versions create different Unicode sets.

+ */

+ filter=UCNV_SET_FILTER_2022_CN;

+ } else if(cnvData->locale[0]=='j' && i==JISX208) {

+ /*

+ * Only add code points that map to Shift-JIS codes

+ * corresponding to JIS X 0208.

+ */

+ filter=UCNV_SET_FILTER_SJIS;

+ } else if(i==KSC5601) {

+ /*

+ * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)

+ * are broader than GR94.

+ */

+ filter=UCNV_SET_FILTER_GR94DBCS;

+ } else {

+ filter=UCNV_SET_FILTER_NONE;

+ }

+ ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);

+ }

+ /*

+ * ISO 2022 converters must not convert SO/SI/ESC despite what

+ * sub-converters do by themselves.

+ * Remove these characters from the set.

+ */

+ sa->remove(sa->set, 0x0e);

+ sa->remove(sa->set, 0x0f);

+ sa->remove(sa->set, 0x1b);

+ /* ISO 2022 converters do not convert C1 controls either */

+ sa->removeRange(sa->set, 0x80, 0x9f);

+static const UConverterImpl _ISO2022Impl={

+ UCNV_ISO_2022,

+ NULL,

+ _ISO2022Open,

+ _ISO2022Close,

+ _ISO2022Reset,

+#ifdef U_ENABLE_GENERIC_ISO_2022

+ T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,

+ ucnv_fromUnicode_UTF8,

+ ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,

+#else

+ NULL,

+#endif

+ NULL,

+ _ISO2022getName,

+ _ISO_2022_WriteSub,

+ _ISO_2022_SafeClone,

+ _ISO_2022_GetUnicodeSet

+};

+static const UConverterStaticData _ISO2022StaticData={

+ sizeof(UConverterStaticData),

+ "ISO_2022",

+ 2022,

+ UCNV_IBM,

+ UCNV_ISO_2022,

+ 1,

+ 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */

+ { 0x1a, 0, 0, 0 },

+ 1,

+ FALSE,

+ 0,

+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

+};

+const UConverterSharedData _ISO2022Data={

+ sizeof(UConverterSharedData),

+ ~((uint32_t) 0),

+ NULL,

+ &_ISO2022StaticData,

+ FALSE,

+ &_ISO2022Impl,

+ 0

+};

+/*************JP****************/

+static const UConverterImpl _ISO2022JPImpl={

+ UCNV_ISO_2022,

+ NULL,

+ _ISO2022Open,

+ _ISO2022Close,

+ _ISO2022Reset,

+ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,

+ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,

+ NULL,

+ _ISO2022getName,

+ _ISO_2022_WriteSub,

+ _ISO_2022_SafeClone,

+ _ISO_2022_GetUnicodeSet

+};

+static const UConverterStaticData _ISO2022JPStaticData={

+ sizeof(UConverterStaticData),

+ "ISO_2022_JP",

+ 0,

+ UCNV_IBM,

+ UCNV_ISO_2022,

+ 1,

+ 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */

+ { 0x1a, 0, 0, 0 },

+ 1,

+ FALSE,

+ 0,

+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

+};

+static const UConverterSharedData _ISO2022JPData={

+ sizeof(UConverterSharedData),

+ ~((uint32_t) 0),

+ NULL,

+ &_ISO2022JPStaticData,

+ FALSE,

+ &_ISO2022JPImpl,

+ 0

+};

+/************* KR ***************/

+static const UConverterImpl _ISO2022KRImpl={

+ UCNV_ISO_2022,

+ NULL,

+ _ISO2022Open,

+ _ISO2022Close,

+ _ISO2022Reset,

+ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,

+ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,

+ NULL,

+ _ISO2022getName,

+ _ISO_2022_WriteSub,

+ _ISO_2022_SafeClone,

+ _ISO_2022_GetUnicodeSet

+};

+static const UConverterStaticData _ISO2022KRStaticData={

+ sizeof(UConverterStaticData),

+ "ISO_2022_KR",

+ 0,

+ UCNV_IBM,

+ UCNV_ISO_2022,

+ 1,

+ 3, /* max 3 bytes per UChar: SO+DBCS */

+ { 0x1a, 0, 0, 0 },

+ 1,

+ FALSE,

+ 0,

+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

+};

+static const UConverterSharedData _ISO2022KRData={

+ sizeof(UConverterSharedData),

+ ~((uint32_t) 0),

+ NULL,

+ &_ISO2022KRStaticData,

+ FALSE,

+ &_ISO2022KRImpl,

+ 0

+};

+/*************** CN ***************/

+static const UConverterImpl _ISO2022CNImpl={

+ UCNV_ISO_2022,

+ NULL,

+ _ISO2022Open,

+ _ISO2022Close,

+ _ISO2022Reset,

+ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,

+ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,

+ NULL,

+ _ISO2022getName,

+ _ISO_2022_WriteSub,

+ _ISO_2022_SafeClone,

+ _ISO_2022_GetUnicodeSet

+};

+static const UConverterStaticData _ISO2022CNStaticData={

+ sizeof(UConverterStaticData),

+ "ISO_2022_CN",

+ 0,

+ UCNV_IBM,

+ UCNV_ISO_2022,

+ 1,

+ 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */

+ { 0x1a, 0, 0, 0 },

+ 1,

+ FALSE,

+ 0,

+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

+};

+static const UConverterSharedData _ISO2022CNData={

+ sizeof(UConverterSharedData),

+ ~((uint32_t) 0),

+ NULL,

+ &_ISO2022CNStaticData,

+ FALSE,

+ &_ISO2022CNImpl,

+ 0

+};

+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

Property changes on: icu46/source/common/ucnv2022.c

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/common/ucnv.c ('k') | icu46/source/common/ucnv_bld.h » ('j') | no next file with comments »