Index: icu46/source/common/ucnvscsu.c |
=================================================================== |
--- icu46/source/common/ucnvscsu.c (revision 0) |
+++ icu46/source/common/ucnvscsu.c (revision 0) |
@@ -0,0 +1,2017 @@ |
+/* |
+****************************************************************************** |
+* |
+* Copyright (C) 2000-2009, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+* |
+****************************************************************************** |
+* file name: ucnvscsu.c |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:4 |
+* |
+* created on: 2000nov18 |
+* created by: Markus W. Scherer |
+* |
+* This is an implementation of the Standard Compression Scheme for Unicode |
+* as defined in http://www.unicode.org/unicode/reports/tr6/ . |
+* Reserved commands and window settings are treated as illegal sequences and |
+* will result in callback calls. |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_CONVERSION |
+ |
+#include "unicode/ucnv.h" |
+#include "unicode/ucnv_cb.h" |
+#include "ucnv_bld.h" |
+#include "ucnv_cnv.h" |
+#include "cmemory.h" |
+ |
+/* SCSU definitions --------------------------------------------------------- */ |
+ |
+/* SCSU command byte values */ |
+enum { |
+ SQ0=0x01, /* Quote from window pair 0 */ |
+ SQ7=0x08, /* Quote from window pair 7 */ |
+ SDX=0x0B, /* Define a window as extended */ |
+ Srs=0x0C, /* reserved */ |
+ SQU=0x0E, /* Quote a single Unicode character */ |
+ SCU=0x0F, /* Change to Unicode mode */ |
+ SC0=0x10, /* Select window 0 */ |
+ SC7=0x17, /* Select window 7 */ |
+ SD0=0x18, /* Define and select window 0 */ |
+ SD7=0x1F, /* Define and select window 7 */ |
+ |
+ UC0=0xE0, /* Select window 0 */ |
+ UC7=0xE7, /* Select window 7 */ |
+ UD0=0xE8, /* Define and select window 0 */ |
+ UD7=0xEF, /* Define and select window 7 */ |
+ UQU=0xF0, /* Quote a single Unicode character */ |
+ UDX=0xF1, /* Define a Window as extended */ |
+ Urs=0xF2 /* reserved */ |
+}; |
+ |
+enum { |
+ /* |
+ * Unicode code points from 3400 to E000 are not adressible by |
+ * dynamic window, since in these areas no short run alphabets are |
+ * found. Therefore add gapOffset to all values from gapThreshold. |
+ */ |
+ gapThreshold=0x68, |
+ gapOffset=0xAC00, |
+ |
+ /* values between reservedStart and fixedThreshold are reserved */ |
+ reservedStart=0xA8, |
+ |
+ /* use table of predefined fixed offsets for values from fixedThreshold */ |
+ fixedThreshold=0xF9 |
+}; |
+ |
+/* constant offsets for the 8 static windows */ |
+static const uint32_t staticOffsets[8]={ |
+ 0x0000, /* ASCII for quoted tags */ |
+ 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ |
+ 0x0100, /* Latin Extended-A */ |
+ 0x0300, /* Combining Diacritical Marks */ |
+ 0x2000, /* General Punctuation */ |
+ 0x2080, /* Currency Symbols */ |
+ 0x2100, /* Letterlike Symbols and Number Forms */ |
+ 0x3000 /* CJK Symbols and punctuation */ |
+}; |
+ |
+/* initial offsets for the 8 dynamic (sliding) windows */ |
+static const uint32_t initialDynamicOffsets[8]={ |
+ 0x0080, /* Latin-1 */ |
+ 0x00C0, /* Latin Extended A */ |
+ 0x0400, /* Cyrillic */ |
+ 0x0600, /* Arabic */ |
+ 0x0900, /* Devanagari */ |
+ 0x3040, /* Hiragana */ |
+ 0x30A0, /* Katakana */ |
+ 0xFF00 /* Fullwidth ASCII */ |
+}; |
+ |
+/* Table of fixed predefined Offsets */ |
+static const uint32_t fixedOffsets[]={ |
+ /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ |
+ /* 0xFA */ 0x0250, /* IPA extensions */ |
+ /* 0xFB */ 0x0370, /* Greek */ |
+ /* 0xFC */ 0x0530, /* Armenian */ |
+ /* 0xFD */ 0x3040, /* Hiragana */ |
+ /* 0xFE */ 0x30A0, /* Katakana */ |
+ /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ |
+}; |
+ |
+/* state values */ |
+enum { |
+ readCommand, |
+ quotePairOne, |
+ quotePairTwo, |
+ quoteOne, |
+ definePairOne, |
+ definePairTwo, |
+ defineOne |
+}; |
+ |
+typedef struct SCSUData { |
+ /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ |
+ uint32_t toUDynamicOffsets[8]; |
+ uint32_t fromUDynamicOffsets[8]; |
+ |
+ /* state machine state - toUnicode */ |
+ UBool toUIsSingleByteMode; |
+ uint8_t toUState; |
+ int8_t toUQuoteWindow, toUDynamicWindow; |
+ uint8_t toUByteOne; |
+ uint8_t toUPadding[3]; |
+ |
+ /* state machine state - fromUnicode */ |
+ UBool fromUIsSingleByteMode; |
+ int8_t fromUDynamicWindow; |
+ |
+ /* |
+ * windowUse[] keeps track of the use of the dynamic windows: |
+ * At nextWindowUseIndex there is the least recently used window, |
+ * and the following windows (in a wrapping manner) are more and more |
+ * recently used. |
+ * At nextWindowUseIndex-1 there is the most recently used window. |
+ */ |
+ uint8_t locale; |
+ int8_t nextWindowUseIndex; |
+ int8_t windowUse[8]; |
+} SCSUData; |
+ |
+static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; |
+static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; |
+ |
+enum { |
+ lGeneric, l_ja |
+}; |
+ |
+/* SCSU setup functions ----------------------------------------------------- */ |
+ |
+static void |
+_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { |
+ SCSUData *scsu=(SCSUData *)cnv->extraInfo; |
+ |
+ if(choice<=UCNV_RESET_TO_UNICODE) { |
+ /* reset toUnicode */ |
+ uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); |
+ |
+ scsu->toUIsSingleByteMode=TRUE; |
+ scsu->toUState=readCommand; |
+ scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; |
+ scsu->toUByteOne=0; |
+ |
+ cnv->toULength=0; |
+ } |
+ if(choice!=UCNV_RESET_TO_UNICODE) { |
+ /* reset fromUnicode */ |
+ uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); |
+ |
+ scsu->fromUIsSingleByteMode=TRUE; |
+ scsu->fromUDynamicWindow=0; |
+ |
+ scsu->nextWindowUseIndex=0; |
+ switch(scsu->locale) { |
+ case l_ja: |
+ uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); |
+ break; |
+ default: |
+ uprv_memcpy(scsu->windowUse, initialWindowUse, 8); |
+ break; |
+ } |
+ |
+ cnv->fromUChar32=0; |
+ } |
+} |
+ |
+static void |
+_SCSUOpen(UConverter *cnv, |
+ UConverterLoadArgs *pArgs, |
+ UErrorCode *pErrorCode) { |
+ const char *locale=pArgs->locale; |
+ if(pArgs->onlyTestIsLoadable) { |
+ return; |
+ } |
+ cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); |
+ if(cnv->extraInfo!=NULL) { |
+ if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { |
+ ((SCSUData *)cnv->extraInfo)->locale=l_ja; |
+ } else { |
+ ((SCSUData *)cnv->extraInfo)->locale=lGeneric; |
+ } |
+ _SCSUReset(cnv, UCNV_RESET_BOTH); |
+ } else { |
+ *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
+ } |
+ |
+ /* Set the substitution character U+fffd as a Unicode string. */ |
+ cnv->subUChars[0]=0xfffd; |
+ cnv->subCharLen=-1; |
+} |
+ |
+static void |
+_SCSUClose(UConverter *cnv) { |
+ if(cnv->extraInfo!=NULL) { |
+ if(!cnv->isExtraLocal) { |
+ uprv_free(cnv->extraInfo); |
+ } |
+ cnv->extraInfo=NULL; |
+ } |
+} |
+ |
+/* SCSU-to-Unicode conversion functions ------------------------------------- */ |
+ |
+static void |
+_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, |
+ UErrorCode *pErrorCode) { |
+ UConverter *cnv; |
+ SCSUData *scsu; |
+ const uint8_t *source, *sourceLimit; |
+ UChar *target; |
+ const UChar *targetLimit; |
+ int32_t *offsets; |
+ UBool isSingleByteMode; |
+ uint8_t state, byteOne; |
+ int8_t quoteWindow, dynamicWindow; |
+ |
+ int32_t sourceIndex, nextSourceIndex; |
+ |
+ uint8_t b; |
+ |
+ /* set up the local pointers */ |
+ cnv=pArgs->converter; |
+ scsu=(SCSUData *)cnv->extraInfo; |
+ |
+ source=(const uint8_t *)pArgs->source; |
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit; |
+ target=pArgs->target; |
+ targetLimit=pArgs->targetLimit; |
+ offsets=pArgs->offsets; |
+ |
+ /* get the state machine state */ |
+ isSingleByteMode=scsu->toUIsSingleByteMode; |
+ state=scsu->toUState; |
+ quoteWindow=scsu->toUQuoteWindow; |
+ dynamicWindow=scsu->toUDynamicWindow; |
+ byteOne=scsu->toUByteOne; |
+ |
+ /* sourceIndex=-1 if the current character began in the previous buffer */ |
+ sourceIndex=state==readCommand ? 0 : -1; |
+ nextSourceIndex=0; |
+ |
+ /* |
+ * conversion "loop" |
+ * |
+ * For performance, this is not a normal C loop. |
+ * Instead, there are two code blocks for the two SCSU modes. |
+ * The function branches to either one, and a change of the mode is done with a goto to |
+ * the other branch. |
+ * |
+ * Each branch has two conventional loops: |
+ * - a fast-path loop for the most common codes in the mode |
+ * - a loop for all other codes in the mode |
+ * When the fast-path runs into a code that it cannot handle, its loop ends and it |
+ * runs into the following loop to handle the other codes. |
+ * The end of the input or output buffer is also handled by the slower loop. |
+ * The slow loop jumps (goto) to the fast-path loop again as soon as possible. |
+ * |
+ * The callback handling is done by returning with an error code. |
+ * The conversion framework actually calls the callback function. |
+ */ |
+ if(isSingleByteMode) { |
+ /* fast path for single-byte mode */ |
+ if(state==readCommand) { |
+fastSingle: |
+ while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { |
+ ++source; |
+ ++nextSourceIndex; |
+ if(b<=0x7f) { |
+ /* write US-ASCII graphic character or DEL */ |
+ *target++=(UChar)b; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ } else { |
+ /* write from dynamic window */ |
+ uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); |
+ if(c<=0xffff) { |
+ *target++=(UChar)c; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ } else { |
+ /* output surrogate pair */ |
+ *target++=(UChar)(0xd7c0+(c>>10)); |
+ if(target<targetLimit) { |
+ *target++=(UChar)(0xdc00|(c&0x3ff)); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ *offsets++=sourceIndex; |
+ } |
+ } else { |
+ /* target overflow */ |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
+ cnv->UCharErrorBufferLength=1; |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ goto endloop; |
+ } |
+ } |
+ } |
+ sourceIndex=nextSourceIndex; |
+ } |
+ } |
+ |
+ /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ |
+singleByteMode: |
+ while(source<sourceLimit) { |
+ if(target>=targetLimit) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ b=*source++; |
+ ++nextSourceIndex; |
+ switch(state) { |
+ case readCommand: |
+ /* redundant conditions are commented out */ |
+ /* here: b<0x20 because otherwise we would be in fastSingle */ |
+ if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
+ /* CR/LF/TAB/NUL */ |
+ *target++=(UChar)b; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ sourceIndex=nextSourceIndex; |
+ goto fastSingle; |
+ } else if(SC0<=b) { |
+ if(b<=SC7) { |
+ dynamicWindow=(int8_t)(b-SC0); |
+ sourceIndex=nextSourceIndex; |
+ goto fastSingle; |
+ } else /* if(SD0<=b && b<=SD7) */ { |
+ dynamicWindow=(int8_t)(b-SD0); |
+ state=defineOne; |
+ } |
+ } else if(/* SQ0<=b && */ b<=SQ7) { |
+ quoteWindow=(int8_t)(b-SQ0); |
+ state=quoteOne; |
+ } else if(b==SDX) { |
+ state=definePairOne; |
+ } else if(b==SQU) { |
+ state=quotePairOne; |
+ } else if(b==SCU) { |
+ sourceIndex=nextSourceIndex; |
+ isSingleByteMode=FALSE; |
+ goto fastUnicode; |
+ } else /* Srs */ { |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ goto endloop; |
+ } |
+ |
+ /* store the first byte of a multibyte sequence in toUBytes[] */ |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ break; |
+ case quotePairOne: |
+ byteOne=b; |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ state=quotePairTwo; |
+ break; |
+ case quotePairTwo: |
+ *target++=(UChar)((byteOne<<8)|b); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ sourceIndex=nextSourceIndex; |
+ state=readCommand; |
+ goto fastSingle; |
+ case quoteOne: |
+ if(b<0x80) { |
+ /* all static offsets are in the BMP */ |
+ *target++=(UChar)(staticOffsets[quoteWindow]+b); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ } else { |
+ /* write from dynamic window */ |
+ uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); |
+ if(c<=0xffff) { |
+ *target++=(UChar)c; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ } else { |
+ /* output surrogate pair */ |
+ *target++=(UChar)(0xd7c0+(c>>10)); |
+ if(target<targetLimit) { |
+ *target++=(UChar)(0xdc00|(c&0x3ff)); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ *offsets++=sourceIndex; |
+ } |
+ } else { |
+ /* target overflow */ |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
+ cnv->UCharErrorBufferLength=1; |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ goto endloop; |
+ } |
+ } |
+ } |
+ sourceIndex=nextSourceIndex; |
+ state=readCommand; |
+ goto fastSingle; |
+ case definePairOne: |
+ dynamicWindow=(int8_t)((b>>5)&7); |
+ byteOne=(uint8_t)(b&0x1f); |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ state=definePairTwo; |
+ break; |
+ case definePairTwo: |
+ scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); |
+ sourceIndex=nextSourceIndex; |
+ state=readCommand; |
+ goto fastSingle; |
+ case defineOne: |
+ if(b==0) { |
+ /* callback(illegal): Reserved window offset value 0 */ |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ goto endloop; |
+ } else if(b<gapThreshold) { |
+ scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; |
+ } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { |
+ scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; |
+ } else if(b>=fixedThreshold) { |
+ scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; |
+ } else { |
+ /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ goto endloop; |
+ } |
+ sourceIndex=nextSourceIndex; |
+ state=readCommand; |
+ goto fastSingle; |
+ } |
+ } |
+ } else { |
+ /* fast path for Unicode mode */ |
+ if(state==readCommand) { |
+fastUnicode: |
+ while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { |
+ *target++=(UChar)((b<<8)|source[1]); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ sourceIndex=nextSourceIndex; |
+ nextSourceIndex+=2; |
+ source+=2; |
+ } |
+ } |
+ |
+ /* normal state machine for Unicode mode */ |
+/* unicodeByteMode: */ |
+ while(source<sourceLimit) { |
+ if(target>=targetLimit) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ b=*source++; |
+ ++nextSourceIndex; |
+ switch(state) { |
+ case readCommand: |
+ if((uint8_t)(b-UC0)>(Urs-UC0)) { |
+ byteOne=b; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=quotePairTwo; |
+ } else if(/* UC0<=b && */ b<=UC7) { |
+ dynamicWindow=(int8_t)(b-UC0); |
+ sourceIndex=nextSourceIndex; |
+ isSingleByteMode=TRUE; |
+ goto fastSingle; |
+ } else if(/* UD0<=b && */ b<=UD7) { |
+ dynamicWindow=(int8_t)(b-UD0); |
+ isSingleByteMode=TRUE; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=defineOne; |
+ goto singleByteMode; |
+ } else if(b==UDX) { |
+ isSingleByteMode=TRUE; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=definePairOne; |
+ goto singleByteMode; |
+ } else if(b==UQU) { |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=quotePairOne; |
+ } else /* Urs */ { |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ goto endloop; |
+ } |
+ break; |
+ case quotePairOne: |
+ byteOne=b; |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ state=quotePairTwo; |
+ break; |
+ case quotePairTwo: |
+ *target++=(UChar)((byteOne<<8)|b); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ sourceIndex=nextSourceIndex; |
+ state=readCommand; |
+ goto fastUnicode; |
+ } |
+ } |
+ } |
+endloop: |
+ |
+ /* set the converter state back into UConverter */ |
+ if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { |
+ /* reset to deal with the next character */ |
+ state=readCommand; |
+ } else if(state==readCommand) { |
+ /* not in a multi-byte sequence, reset toULength */ |
+ cnv->toULength=0; |
+ } |
+ scsu->toUIsSingleByteMode=isSingleByteMode; |
+ scsu->toUState=state; |
+ scsu->toUQuoteWindow=quoteWindow; |
+ scsu->toUDynamicWindow=dynamicWindow; |
+ scsu->toUByteOne=byteOne; |
+ |
+ /* write back the updated pointers */ |
+ pArgs->source=(const char *)source; |
+ pArgs->target=target; |
+ pArgs->offsets=offsets; |
+ return; |
+} |
+ |
+/* |
+ * Identical to _SCSUToUnicodeWithOffsets but without offset handling. |
+ * If a change is made in the original function, then either |
+ * change this function the same way or |
+ * re-copy the original function and remove the variables |
+ * offsets, sourceIndex, and nextSourceIndex. |
+ */ |
+static void |
+_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, |
+ UErrorCode *pErrorCode) { |
+ UConverter *cnv; |
+ SCSUData *scsu; |
+ const uint8_t *source, *sourceLimit; |
+ UChar *target; |
+ const UChar *targetLimit; |
+ UBool isSingleByteMode; |
+ uint8_t state, byteOne; |
+ int8_t quoteWindow, dynamicWindow; |
+ |
+ uint8_t b; |
+ |
+ /* set up the local pointers */ |
+ cnv=pArgs->converter; |
+ scsu=(SCSUData *)cnv->extraInfo; |
+ |
+ source=(const uint8_t *)pArgs->source; |
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit; |
+ target=pArgs->target; |
+ targetLimit=pArgs->targetLimit; |
+ |
+ /* get the state machine state */ |
+ isSingleByteMode=scsu->toUIsSingleByteMode; |
+ state=scsu->toUState; |
+ quoteWindow=scsu->toUQuoteWindow; |
+ dynamicWindow=scsu->toUDynamicWindow; |
+ byteOne=scsu->toUByteOne; |
+ |
+ /* |
+ * conversion "loop" |
+ * |
+ * For performance, this is not a normal C loop. |
+ * Instead, there are two code blocks for the two SCSU modes. |
+ * The function branches to either one, and a change of the mode is done with a goto to |
+ * the other branch. |
+ * |
+ * Each branch has two conventional loops: |
+ * - a fast-path loop for the most common codes in the mode |
+ * - a loop for all other codes in the mode |
+ * When the fast-path runs into a code that it cannot handle, its loop ends and it |
+ * runs into the following loop to handle the other codes. |
+ * The end of the input or output buffer is also handled by the slower loop. |
+ * The slow loop jumps (goto) to the fast-path loop again as soon as possible. |
+ * |
+ * The callback handling is done by returning with an error code. |
+ * The conversion framework actually calls the callback function. |
+ */ |
+ if(isSingleByteMode) { |
+ /* fast path for single-byte mode */ |
+ if(state==readCommand) { |
+fastSingle: |
+ while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { |
+ ++source; |
+ if(b<=0x7f) { |
+ /* write US-ASCII graphic character or DEL */ |
+ *target++=(UChar)b; |
+ } else { |
+ /* write from dynamic window */ |
+ uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); |
+ if(c<=0xffff) { |
+ *target++=(UChar)c; |
+ } else { |
+ /* output surrogate pair */ |
+ *target++=(UChar)(0xd7c0+(c>>10)); |
+ if(target<targetLimit) { |
+ *target++=(UChar)(0xdc00|(c&0x3ff)); |
+ } else { |
+ /* target overflow */ |
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
+ cnv->UCharErrorBufferLength=1; |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ goto endloop; |
+ } |
+ } |
+ } |
+ } |
+ } |
+ |
+ /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ |
+singleByteMode: |
+ while(source<sourceLimit) { |
+ if(target>=targetLimit) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ b=*source++; |
+ switch(state) { |
+ case readCommand: |
+ /* redundant conditions are commented out */ |
+ /* here: b<0x20 because otherwise we would be in fastSingle */ |
+ if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
+ /* CR/LF/TAB/NUL */ |
+ *target++=(UChar)b; |
+ goto fastSingle; |
+ } else if(SC0<=b) { |
+ if(b<=SC7) { |
+ dynamicWindow=(int8_t)(b-SC0); |
+ goto fastSingle; |
+ } else /* if(SD0<=b && b<=SD7) */ { |
+ dynamicWindow=(int8_t)(b-SD0); |
+ state=defineOne; |
+ } |
+ } else if(/* SQ0<=b && */ b<=SQ7) { |
+ quoteWindow=(int8_t)(b-SQ0); |
+ state=quoteOne; |
+ } else if(b==SDX) { |
+ state=definePairOne; |
+ } else if(b==SQU) { |
+ state=quotePairOne; |
+ } else if(b==SCU) { |
+ isSingleByteMode=FALSE; |
+ goto fastUnicode; |
+ } else /* Srs */ { |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ goto endloop; |
+ } |
+ |
+ /* store the first byte of a multibyte sequence in toUBytes[] */ |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ break; |
+ case quotePairOne: |
+ byteOne=b; |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ state=quotePairTwo; |
+ break; |
+ case quotePairTwo: |
+ *target++=(UChar)((byteOne<<8)|b); |
+ state=readCommand; |
+ goto fastSingle; |
+ case quoteOne: |
+ if(b<0x80) { |
+ /* all static offsets are in the BMP */ |
+ *target++=(UChar)(staticOffsets[quoteWindow]+b); |
+ } else { |
+ /* write from dynamic window */ |
+ uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); |
+ if(c<=0xffff) { |
+ *target++=(UChar)c; |
+ } else { |
+ /* output surrogate pair */ |
+ *target++=(UChar)(0xd7c0+(c>>10)); |
+ if(target<targetLimit) { |
+ *target++=(UChar)(0xdc00|(c&0x3ff)); |
+ } else { |
+ /* target overflow */ |
+ cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); |
+ cnv->UCharErrorBufferLength=1; |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ goto endloop; |
+ } |
+ } |
+ } |
+ state=readCommand; |
+ goto fastSingle; |
+ case definePairOne: |
+ dynamicWindow=(int8_t)((b>>5)&7); |
+ byteOne=(uint8_t)(b&0x1f); |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ state=definePairTwo; |
+ break; |
+ case definePairTwo: |
+ scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); |
+ state=readCommand; |
+ goto fastSingle; |
+ case defineOne: |
+ if(b==0) { |
+ /* callback(illegal): Reserved window offset value 0 */ |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ goto endloop; |
+ } else if(b<gapThreshold) { |
+ scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; |
+ } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { |
+ scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; |
+ } else if(b>=fixedThreshold) { |
+ scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; |
+ } else { |
+ /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ goto endloop; |
+ } |
+ state=readCommand; |
+ goto fastSingle; |
+ } |
+ } |
+ } else { |
+ /* fast path for Unicode mode */ |
+ if(state==readCommand) { |
+fastUnicode: |
+ while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { |
+ *target++=(UChar)((b<<8)|source[1]); |
+ source+=2; |
+ } |
+ } |
+ |
+ /* normal state machine for Unicode mode */ |
+/* unicodeByteMode: */ |
+ while(source<sourceLimit) { |
+ if(target>=targetLimit) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ b=*source++; |
+ switch(state) { |
+ case readCommand: |
+ if((uint8_t)(b-UC0)>(Urs-UC0)) { |
+ byteOne=b; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=quotePairTwo; |
+ } else if(/* UC0<=b && */ b<=UC7) { |
+ dynamicWindow=(int8_t)(b-UC0); |
+ isSingleByteMode=TRUE; |
+ goto fastSingle; |
+ } else if(/* UD0<=b && */ b<=UD7) { |
+ dynamicWindow=(int8_t)(b-UD0); |
+ isSingleByteMode=TRUE; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=defineOne; |
+ goto singleByteMode; |
+ } else if(b==UDX) { |
+ isSingleByteMode=TRUE; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=definePairOne; |
+ goto singleByteMode; |
+ } else if(b==UQU) { |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ state=quotePairOne; |
+ } else /* Urs */ { |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ cnv->toUBytes[0]=b; |
+ cnv->toULength=1; |
+ goto endloop; |
+ } |
+ break; |
+ case quotePairOne: |
+ byteOne=b; |
+ cnv->toUBytes[1]=b; |
+ cnv->toULength=2; |
+ state=quotePairTwo; |
+ break; |
+ case quotePairTwo: |
+ *target++=(UChar)((byteOne<<8)|b); |
+ state=readCommand; |
+ goto fastUnicode; |
+ } |
+ } |
+ } |
+endloop: |
+ |
+ /* set the converter state back into UConverter */ |
+ if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { |
+ /* reset to deal with the next character */ |
+ state=readCommand; |
+ } else if(state==readCommand) { |
+ /* not in a multi-byte sequence, reset toULength */ |
+ cnv->toULength=0; |
+ } |
+ scsu->toUIsSingleByteMode=isSingleByteMode; |
+ scsu->toUState=state; |
+ scsu->toUQuoteWindow=quoteWindow; |
+ scsu->toUDynamicWindow=dynamicWindow; |
+ scsu->toUByteOne=byteOne; |
+ |
+ /* write back the updated pointers */ |
+ pArgs->source=(const char *)source; |
+ pArgs->target=target; |
+ return; |
+} |
+ |
+/* SCSU-from-Unicode conversion functions ----------------------------------- */ |
+ |
+/* |
+ * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve |
+ * reasonable results. The lookahead is minimal. |
+ * Many cases are simple: |
+ * A character fits directly into the current mode, a dynamic or static window, |
+ * or is not compressible. These cases are tested first. |
+ * Real compression heuristics are applied to the rest, in code branches for |
+ * single/Unicode mode and BMP/supplementary code points. |
+ * The heuristics used here are extremely simple. |
+ */ |
+ |
+/* get the number of the window that this character is in, or -1 */ |
+static int8_t |
+getWindow(const uint32_t offsets[8], uint32_t c) { |
+ int i; |
+ for(i=0; i<8; ++i) { |
+ if((uint32_t)(c-offsets[i])<=0x7f) { |
+ return (int8_t)(i); |
+ } |
+ } |
+ return -1; |
+} |
+ |
+/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ |
+static UBool |
+isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { |
+ return (UBool)(c<=offset+0x7f && |
+ (c>=offset || (c<=0x7f && |
+ (c>=0x20 || (1UL<<c)&0x2601)))); |
+ /* binary 0010 0110 0000 0001, |
+ check for b==0xd || b==0xa || b==9 || b==0 */ |
+} |
+ |
+/* |
+ * getNextDynamicWindow returns the next dynamic window to be redefined |
+ */ |
+static int8_t |
+getNextDynamicWindow(SCSUData *scsu) { |
+ int8_t window=scsu->windowUse[scsu->nextWindowUseIndex]; |
+ if(++scsu->nextWindowUseIndex==8) { |
+ scsu->nextWindowUseIndex=0; |
+ } |
+ return window; |
+} |
+ |
+/* |
+ * useDynamicWindow() adjusts |
+ * windowUse[] and nextWindowUseIndex for the algorithm to choose |
+ * the next dynamic window to be defined; |
+ * a subclass may override it and provide its own algorithm. |
+ */ |
+static void |
+useDynamicWindow(SCSUData *scsu, int8_t window) { |
+ /* |
+ * move the existing window, which just became the most recently used one, |
+ * up in windowUse[] to nextWindowUseIndex-1 |
+ */ |
+ |
+ /* first, find the index of the window - backwards to favor the more recently used windows */ |
+ int i, j; |
+ |
+ i=scsu->nextWindowUseIndex; |
+ do { |
+ if(--i<0) { |
+ i=7; |
+ } |
+ } while(scsu->windowUse[i]!=window); |
+ |
+ /* now copy each windowUse[i+1] to [i] */ |
+ j=i+1; |
+ if(j==8) { |
+ j=0; |
+ } |
+ while(j!=scsu->nextWindowUseIndex) { |
+ scsu->windowUse[i]=scsu->windowUse[j]; |
+ i=j; |
+ if(++j==8) { j=0; } |
+ } |
+ |
+ /* finally, set the window into the most recently used index */ |
+ scsu->windowUse[i]=window; |
+} |
+ |
+/* |
+ * calculate the offset and the code for a dynamic window that contains the character |
+ * takes fixed offsets into account |
+ * the offset of the window is stored in the offset variable, |
+ * the code is returned |
+ * |
+ * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code |
+ */ |
+static int |
+getDynamicOffset(uint32_t c, uint32_t *pOffset) { |
+ int i; |
+ |
+ for(i=0; i<7; ++i) { |
+ if((uint32_t)(c-fixedOffsets[i])<=0x7f) { |
+ *pOffset=fixedOffsets[i]; |
+ return 0xf9+i; |
+ } |
+ } |
+ |
+ if(c<0x80) { |
+ /* No dynamic window for US-ASCII. */ |
+ return -1; |
+ } else if(c<0x3400 || |
+ (uint32_t)(c-0x10000)<(0x14000-0x10000) || |
+ (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) |
+ ) { |
+ /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ |
+ *pOffset=c&0x7fffff80; |
+ return (int)(c>>7); |
+ } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { |
+ /* For these characters we need to take the gapOffset into account. */ |
+ *pOffset=c&0x7fffff80; |
+ return (int)((c-gapOffset)>>7); |
+ } else { |
+ return -1; |
+ } |
+} |
+ |
+/* |
+ * Idea for compression: |
+ * - save SCSUData and other state before really starting work |
+ * - at endloop, see if compression could be better with just unicode mode |
+ * - don't do this if a callback has been called |
+ * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning |
+ * - different buffer handling! |
+ * |
+ * Drawback or need for corrective handling: |
+ * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and |
+ * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible |
+ * not only for compression but also for HTML/XML documents with following charset/encoding announcers. |
+ * |
+ * How to achieve both? |
+ * - Only replace the result after an SDX or SCU? |
+ */ |
+ |
+static void |
+_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, |
+ UErrorCode *pErrorCode) { |
+ UConverter *cnv; |
+ SCSUData *scsu; |
+ const UChar *source, *sourceLimit; |
+ uint8_t *target; |
+ int32_t targetCapacity; |
+ int32_t *offsets; |
+ |
+ UBool isSingleByteMode; |
+ uint8_t dynamicWindow; |
+ uint32_t currentOffset; |
+ |
+ uint32_t c, delta; |
+ |
+ int32_t sourceIndex, nextSourceIndex; |
+ |
+ int32_t length; |
+ |
+ /* variables for compression heuristics */ |
+ uint32_t offset; |
+ UChar lead, trail; |
+ int code; |
+ int8_t window; |
+ |
+ /* set up the local pointers */ |
+ cnv=pArgs->converter; |
+ scsu=(SCSUData *)cnv->extraInfo; |
+ |
+ /* set up the local pointers */ |
+ source=pArgs->source; |
+ sourceLimit=pArgs->sourceLimit; |
+ target=(uint8_t *)pArgs->target; |
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); |
+ offsets=pArgs->offsets; |
+ |
+ /* get the state machine state */ |
+ isSingleByteMode=scsu->fromUIsSingleByteMode; |
+ dynamicWindow=scsu->fromUDynamicWindow; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ |
+ c=cnv->fromUChar32; |
+ |
+ /* sourceIndex=-1 if the current character began in the previous buffer */ |
+ sourceIndex= c==0 ? 0 : -1; |
+ nextSourceIndex=0; |
+ |
+ /* similar conversion "loop" as in toUnicode */ |
+loop: |
+ if(isSingleByteMode) { |
+ if(c!=0 && targetCapacity>0) { |
+ goto getTrailSingle; |
+ } |
+ |
+ /* state machine for single-byte mode */ |
+/* singleByteMode: */ |
+ while(source<sourceLimit) { |
+ if(targetCapacity<=0) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ c=*source++; |
+ ++nextSourceIndex; |
+ |
+ if((c-0x20)<=0x5f) { |
+ /* pass US-ASCII graphic character through */ |
+ *target++=(uint8_t)c; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ --targetCapacity; |
+ } else if(c<0x20) { |
+ if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
+ /* CR/LF/TAB/NUL */ |
+ *target++=(uint8_t)c; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ --targetCapacity; |
+ } else { |
+ /* quote C0 control character */ |
+ c|=SQ0<<8; |
+ length=2; |
+ goto outputBytes; |
+ } |
+ } else if((delta=c-currentOffset)<=0x7f) { |
+ /* use the current dynamic window */ |
+ *target++=(uint8_t)(delta|0x80); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ --targetCapacity; |
+ } else if(UTF_IS_SURROGATE(c)) { |
+ if(UTF_IS_SURROGATE_FIRST(c)) { |
+getTrailSingle: |
+ lead=(UChar)c; |
+ if(source<sourceLimit) { |
+ /* test the following code unit */ |
+ trail=*source; |
+ if(UTF_IS_SECOND_SURROGATE(trail)) { |
+ ++source; |
+ ++nextSourceIndex; |
+ c=UTF16_GET_PAIR_VALUE(c, trail); |
+ /* convert this surrogate code point */ |
+ /* exit this condition tree */ |
+ } else { |
+ /* this is an unmatched lead code unit (1st surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ } else { |
+ /* no more input */ |
+ break; |
+ } |
+ } else { |
+ /* this is an unmatched trail code unit (2nd surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ |
+ /* compress supplementary character U+10000..U+10ffff */ |
+ if((delta=c-currentOffset)<=0x7f) { |
+ /* use the current dynamic window */ |
+ *target++=(uint8_t)(delta|0x80); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ --targetCapacity; |
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
+ /* there is a dynamic window that contains this character, change to it */ |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else if((code=getDynamicOffset(c, &offset))>=0) { |
+ /* might check if there are more characters in this window to come */ |
+ /* define an extended window with this character */ |
+ code-=0x200; |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=4; |
+ goto outputBytes; |
+ } else { |
+ /* change to Unicode mode and output this (lead, trail) pair */ |
+ isSingleByteMode=FALSE; |
+ *target++=(uint8_t)SCU; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ --targetCapacity; |
+ c=((uint32_t)lead<<16)|trail; |
+ length=4; |
+ goto outputBytes; |
+ } |
+ } else if(c<0xa0) { |
+ /* quote C1 control character */ |
+ c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ |
+ length=2; |
+ goto outputBytes; |
+ } else if(c==0xfeff || c>=0xfff0) { |
+ /* quote signature character=byte order mark and specials */ |
+ c|=SQU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } else { |
+ /* compress all other BMP characters */ |
+ if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
+ /* there is a window defined that contains this character - switch to it or quote from it? */ |
+ if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { |
+ /* change to dynamic window */ |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else { |
+ /* quote from dynamic window */ |
+ c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } |
+ } else if((window=getWindow(staticOffsets, c))>=0) { |
+ /* quote from static window */ |
+ c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); |
+ length=2; |
+ goto outputBytes; |
+ } else if((code=getDynamicOffset(c, &offset))>=0) { |
+ /* define a dynamic window with this character */ |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=3; |
+ goto outputBytes; |
+ } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && |
+ (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
+ ) { |
+ /* |
+ * this character is not compressible (a BMP ideograph or similar); |
+ * switch to Unicode mode if this is the last character in the block |
+ * or there is at least one more ideograph following immediately |
+ */ |
+ isSingleByteMode=FALSE; |
+ c|=SCU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } else { |
+ /* quote Unicode */ |
+ c|=SQU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } |
+ } |
+ |
+ /* normal end of conversion: prepare for a new character */ |
+ c=0; |
+ sourceIndex=nextSourceIndex; |
+ } |
+ } else { |
+ if(c!=0 && targetCapacity>0) { |
+ goto getTrailUnicode; |
+ } |
+ |
+ /* state machine for Unicode mode */ |
+/* unicodeByteMode: */ |
+ while(source<sourceLimit) { |
+ if(targetCapacity<=0) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ c=*source++; |
+ ++nextSourceIndex; |
+ |
+ if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { |
+ /* not compressible, write character directly */ |
+ if(targetCapacity>=2) { |
+ *target++=(uint8_t)(c>>8); |
+ *target++=(uint8_t)c; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ *offsets++=sourceIndex; |
+ } |
+ targetCapacity-=2; |
+ } else { |
+ length=2; |
+ goto outputBytes; |
+ } |
+ } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { |
+ /* compress BMP character if the following one is not an uncompressible ideograph */ |
+ if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { |
+ if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { |
+ /* ASCII digit or letter */ |
+ isSingleByteMode=TRUE; |
+ c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; |
+ length=2; |
+ goto outputBytes; |
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
+ /* there is a dynamic window that contains this character, change to it */ |
+ isSingleByteMode=TRUE; |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else if((code=getDynamicOffset(c, &offset))>=0) { |
+ /* define a dynamic window with this character */ |
+ isSingleByteMode=TRUE; |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=3; |
+ goto outputBytes; |
+ } |
+ } |
+ |
+ /* don't know how to compress this character, just write it directly */ |
+ length=2; |
+ goto outputBytes; |
+ } else if(c<0xe000) { |
+ /* c is a surrogate */ |
+ if(UTF_IS_SURROGATE_FIRST(c)) { |
+getTrailUnicode: |
+ lead=(UChar)c; |
+ if(source<sourceLimit) { |
+ /* test the following code unit */ |
+ trail=*source; |
+ if(UTF_IS_SECOND_SURROGATE(trail)) { |
+ ++source; |
+ ++nextSourceIndex; |
+ c=UTF16_GET_PAIR_VALUE(c, trail); |
+ /* convert this surrogate code point */ |
+ /* exit this condition tree */ |
+ } else { |
+ /* this is an unmatched lead code unit (1st surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ } else { |
+ /* no more input */ |
+ break; |
+ } |
+ } else { |
+ /* this is an unmatched trail code unit (2nd surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ |
+ /* compress supplementary character */ |
+ if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && |
+ !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
+ ) { |
+ /* |
+ * there is a dynamic window that contains this character and |
+ * the following character is not uncompressible, |
+ * change to the window |
+ */ |
+ isSingleByteMode=TRUE; |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ |
+ (code=getDynamicOffset(c, &offset))>=0 |
+ ) { |
+ /* two supplementary characters in (probably) the same window - define an extended one */ |
+ isSingleByteMode=TRUE; |
+ code-=0x200; |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=4; |
+ goto outputBytes; |
+ } else { |
+ /* don't know how to compress this character, just write it directly */ |
+ c=((uint32_t)lead<<16)|trail; |
+ length=4; |
+ goto outputBytes; |
+ } |
+ } else /* 0xe000<=c<0xf300 */ { |
+ /* quote to avoid SCSU tags */ |
+ c|=UQU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } |
+ |
+ /* normal end of conversion: prepare for a new character */ |
+ c=0; |
+ sourceIndex=nextSourceIndex; |
+ } |
+ } |
+endloop: |
+ |
+ /* set the converter state back into UConverter */ |
+ scsu->fromUIsSingleByteMode=isSingleByteMode; |
+ scsu->fromUDynamicWindow=dynamicWindow; |
+ |
+ cnv->fromUChar32=c; |
+ |
+ /* write back the updated pointers */ |
+ pArgs->source=source; |
+ pArgs->target=(char *)target; |
+ pArgs->offsets=offsets; |
+ return; |
+ |
+outputBytes: |
+ /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ |
+ /* from the first if in the loop we know that targetCapacity>0 */ |
+ if(length<=targetCapacity) { |
+ if(offsets==NULL) { |
+ switch(length) { |
+ /* each branch falls through to the next one */ |
+ case 4: |
+ *target++=(uint8_t)(c>>24); |
+ case 3: |
+ *target++=(uint8_t)(c>>16); |
+ case 2: |
+ *target++=(uint8_t)(c>>8); |
+ case 1: |
+ *target++=(uint8_t)c; |
+ default: |
+ /* will never occur */ |
+ break; |
+ } |
+ } else { |
+ switch(length) { |
+ /* each branch falls through to the next one */ |
+ case 4: |
+ *target++=(uint8_t)(c>>24); |
+ *offsets++=sourceIndex; |
+ case 3: |
+ *target++=(uint8_t)(c>>16); |
+ *offsets++=sourceIndex; |
+ case 2: |
+ *target++=(uint8_t)(c>>8); |
+ *offsets++=sourceIndex; |
+ case 1: |
+ *target++=(uint8_t)c; |
+ *offsets++=sourceIndex; |
+ default: |
+ /* will never occur */ |
+ break; |
+ } |
+ } |
+ targetCapacity-=length; |
+ |
+ /* normal end of conversion: prepare for a new character */ |
+ c=0; |
+ sourceIndex=nextSourceIndex; |
+ goto loop; |
+ } else { |
+ uint8_t *p; |
+ |
+ /* |
+ * We actually do this backwards here: |
+ * In order to save an intermediate variable, we output |
+ * first to the overflow buffer what does not fit into the |
+ * regular target. |
+ */ |
+ /* we know that 0<=targetCapacity<length<=4 */ |
+ /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ |
+ length-=targetCapacity; |
+ p=(uint8_t *)cnv->charErrorBuffer; |
+ switch(length) { |
+ /* each branch falls through to the next one */ |
+ case 4: |
+ *p++=(uint8_t)(c>>24); |
+ case 3: |
+ *p++=(uint8_t)(c>>16); |
+ case 2: |
+ *p++=(uint8_t)(c>>8); |
+ case 1: |
+ *p=(uint8_t)c; |
+ default: |
+ /* will never occur */ |
+ break; |
+ } |
+ cnv->charErrorBufferLength=(int8_t)length; |
+ |
+ /* now output what fits into the regular target */ |
+ c>>=8*length; /* length was reduced by targetCapacity */ |
+ switch(targetCapacity) { |
+ /* each branch falls through to the next one */ |
+ case 3: |
+ *target++=(uint8_t)(c>>16); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ case 2: |
+ *target++=(uint8_t)(c>>8); |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ case 1: |
+ *target++=(uint8_t)c; |
+ if(offsets!=NULL) { |
+ *offsets++=sourceIndex; |
+ } |
+ default: |
+ break; |
+ } |
+ |
+ /* target overflow */ |
+ targetCapacity=0; |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ c=0; |
+ goto endloop; |
+ } |
+} |
+ |
+/* |
+ * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. |
+ * If a change is made in the original function, then either |
+ * change this function the same way or |
+ * re-copy the original function and remove the variables |
+ * offsets, sourceIndex, and nextSourceIndex. |
+ */ |
+static void |
+_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, |
+ UErrorCode *pErrorCode) { |
+ UConverter *cnv; |
+ SCSUData *scsu; |
+ const UChar *source, *sourceLimit; |
+ uint8_t *target; |
+ int32_t targetCapacity; |
+ |
+ UBool isSingleByteMode; |
+ uint8_t dynamicWindow; |
+ uint32_t currentOffset; |
+ |
+ uint32_t c, delta; |
+ |
+ int32_t length; |
+ |
+ /* variables for compression heuristics */ |
+ uint32_t offset; |
+ UChar lead, trail; |
+ int code; |
+ int8_t window; |
+ |
+ /* set up the local pointers */ |
+ cnv=pArgs->converter; |
+ scsu=(SCSUData *)cnv->extraInfo; |
+ |
+ /* set up the local pointers */ |
+ source=pArgs->source; |
+ sourceLimit=pArgs->sourceLimit; |
+ target=(uint8_t *)pArgs->target; |
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); |
+ |
+ /* get the state machine state */ |
+ isSingleByteMode=scsu->fromUIsSingleByteMode; |
+ dynamicWindow=scsu->fromUDynamicWindow; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ |
+ c=cnv->fromUChar32; |
+ |
+ /* similar conversion "loop" as in toUnicode */ |
+loop: |
+ if(isSingleByteMode) { |
+ if(c!=0 && targetCapacity>0) { |
+ goto getTrailSingle; |
+ } |
+ |
+ /* state machine for single-byte mode */ |
+/* singleByteMode: */ |
+ while(source<sourceLimit) { |
+ if(targetCapacity<=0) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ c=*source++; |
+ |
+ if((c-0x20)<=0x5f) { |
+ /* pass US-ASCII graphic character through */ |
+ *target++=(uint8_t)c; |
+ --targetCapacity; |
+ } else if(c<0x20) { |
+ if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { |
+ /* CR/LF/TAB/NUL */ |
+ *target++=(uint8_t)c; |
+ --targetCapacity; |
+ } else { |
+ /* quote C0 control character */ |
+ c|=SQ0<<8; |
+ length=2; |
+ goto outputBytes; |
+ } |
+ } else if((delta=c-currentOffset)<=0x7f) { |
+ /* use the current dynamic window */ |
+ *target++=(uint8_t)(delta|0x80); |
+ --targetCapacity; |
+ } else if(UTF_IS_SURROGATE(c)) { |
+ if(UTF_IS_SURROGATE_FIRST(c)) { |
+getTrailSingle: |
+ lead=(UChar)c; |
+ if(source<sourceLimit) { |
+ /* test the following code unit */ |
+ trail=*source; |
+ if(UTF_IS_SECOND_SURROGATE(trail)) { |
+ ++source; |
+ c=UTF16_GET_PAIR_VALUE(c, trail); |
+ /* convert this surrogate code point */ |
+ /* exit this condition tree */ |
+ } else { |
+ /* this is an unmatched lead code unit (1st surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ } else { |
+ /* no more input */ |
+ break; |
+ } |
+ } else { |
+ /* this is an unmatched trail code unit (2nd surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ |
+ /* compress supplementary character U+10000..U+10ffff */ |
+ if((delta=c-currentOffset)<=0x7f) { |
+ /* use the current dynamic window */ |
+ *target++=(uint8_t)(delta|0x80); |
+ --targetCapacity; |
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
+ /* there is a dynamic window that contains this character, change to it */ |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else if((code=getDynamicOffset(c, &offset))>=0) { |
+ /* might check if there are more characters in this window to come */ |
+ /* define an extended window with this character */ |
+ code-=0x200; |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=4; |
+ goto outputBytes; |
+ } else { |
+ /* change to Unicode mode and output this (lead, trail) pair */ |
+ isSingleByteMode=FALSE; |
+ *target++=(uint8_t)SCU; |
+ --targetCapacity; |
+ c=((uint32_t)lead<<16)|trail; |
+ length=4; |
+ goto outputBytes; |
+ } |
+ } else if(c<0xa0) { |
+ /* quote C1 control character */ |
+ c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ |
+ length=2; |
+ goto outputBytes; |
+ } else if(c==0xfeff || c>=0xfff0) { |
+ /* quote signature character=byte order mark and specials */ |
+ c|=SQU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } else { |
+ /* compress all other BMP characters */ |
+ if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
+ /* there is a window defined that contains this character - switch to it or quote from it? */ |
+ if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { |
+ /* change to dynamic window */ |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else { |
+ /* quote from dynamic window */ |
+ c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } |
+ } else if((window=getWindow(staticOffsets, c))>=0) { |
+ /* quote from static window */ |
+ c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); |
+ length=2; |
+ goto outputBytes; |
+ } else if((code=getDynamicOffset(c, &offset))>=0) { |
+ /* define a dynamic window with this character */ |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=3; |
+ goto outputBytes; |
+ } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && |
+ (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
+ ) { |
+ /* |
+ * this character is not compressible (a BMP ideograph or similar); |
+ * switch to Unicode mode if this is the last character in the block |
+ * or there is at least one more ideograph following immediately |
+ */ |
+ isSingleByteMode=FALSE; |
+ c|=SCU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } else { |
+ /* quote Unicode */ |
+ c|=SQU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } |
+ } |
+ |
+ /* normal end of conversion: prepare for a new character */ |
+ c=0; |
+ } |
+ } else { |
+ if(c!=0 && targetCapacity>0) { |
+ goto getTrailUnicode; |
+ } |
+ |
+ /* state machine for Unicode mode */ |
+/* unicodeByteMode: */ |
+ while(source<sourceLimit) { |
+ if(targetCapacity<=0) { |
+ /* target is full */ |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ break; |
+ } |
+ c=*source++; |
+ |
+ if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { |
+ /* not compressible, write character directly */ |
+ if(targetCapacity>=2) { |
+ *target++=(uint8_t)(c>>8); |
+ *target++=(uint8_t)c; |
+ targetCapacity-=2; |
+ } else { |
+ length=2; |
+ goto outputBytes; |
+ } |
+ } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { |
+ /* compress BMP character if the following one is not an uncompressible ideograph */ |
+ if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { |
+ if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { |
+ /* ASCII digit or letter */ |
+ isSingleByteMode=TRUE; |
+ c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; |
+ length=2; |
+ goto outputBytes; |
+ } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { |
+ /* there is a dynamic window that contains this character, change to it */ |
+ isSingleByteMode=TRUE; |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else if((code=getDynamicOffset(c, &offset))>=0) { |
+ /* define a dynamic window with this character */ |
+ isSingleByteMode=TRUE; |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=3; |
+ goto outputBytes; |
+ } |
+ } |
+ |
+ /* don't know how to compress this character, just write it directly */ |
+ length=2; |
+ goto outputBytes; |
+ } else if(c<0xe000) { |
+ /* c is a surrogate */ |
+ if(UTF_IS_SURROGATE_FIRST(c)) { |
+getTrailUnicode: |
+ lead=(UChar)c; |
+ if(source<sourceLimit) { |
+ /* test the following code unit */ |
+ trail=*source; |
+ if(UTF_IS_SECOND_SURROGATE(trail)) { |
+ ++source; |
+ c=UTF16_GET_PAIR_VALUE(c, trail); |
+ /* convert this surrogate code point */ |
+ /* exit this condition tree */ |
+ } else { |
+ /* this is an unmatched lead code unit (1st surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ } else { |
+ /* no more input */ |
+ break; |
+ } |
+ } else { |
+ /* this is an unmatched trail code unit (2nd surrogate) */ |
+ /* callback(illegal) */ |
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND; |
+ goto endloop; |
+ } |
+ |
+ /* compress supplementary character */ |
+ if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && |
+ !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) |
+ ) { |
+ /* |
+ * there is a dynamic window that contains this character and |
+ * the following character is not uncompressible, |
+ * change to the window |
+ */ |
+ isSingleByteMode=TRUE; |
+ dynamicWindow=window; |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; |
+ length=2; |
+ goto outputBytes; |
+ } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ |
+ (code=getDynamicOffset(c, &offset))>=0 |
+ ) { |
+ /* two supplementary characters in (probably) the same window - define an extended one */ |
+ isSingleByteMode=TRUE; |
+ code-=0x200; |
+ dynamicWindow=getNextDynamicWindow(scsu); |
+ currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; |
+ useDynamicWindow(scsu, dynamicWindow); |
+ c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; |
+ length=4; |
+ goto outputBytes; |
+ } else { |
+ /* don't know how to compress this character, just write it directly */ |
+ c=((uint32_t)lead<<16)|trail; |
+ length=4; |
+ goto outputBytes; |
+ } |
+ } else /* 0xe000<=c<0xf300 */ { |
+ /* quote to avoid SCSU tags */ |
+ c|=UQU<<16; |
+ length=3; |
+ goto outputBytes; |
+ } |
+ |
+ /* normal end of conversion: prepare for a new character */ |
+ c=0; |
+ } |
+ } |
+endloop: |
+ |
+ /* set the converter state back into UConverter */ |
+ scsu->fromUIsSingleByteMode=isSingleByteMode; |
+ scsu->fromUDynamicWindow=dynamicWindow; |
+ |
+ cnv->fromUChar32=c; |
+ |
+ /* write back the updated pointers */ |
+ pArgs->source=source; |
+ pArgs->target=(char *)target; |
+ return; |
+ |
+outputBytes: |
+ /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ |
+ /* from the first if in the loop we know that targetCapacity>0 */ |
+ if(length<=targetCapacity) { |
+ switch(length) { |
+ /* each branch falls through to the next one */ |
+ case 4: |
+ *target++=(uint8_t)(c>>24); |
+ case 3: |
+ *target++=(uint8_t)(c>>16); |
+ case 2: |
+ *target++=(uint8_t)(c>>8); |
+ case 1: |
+ *target++=(uint8_t)c; |
+ default: |
+ /* will never occur */ |
+ break; |
+ } |
+ targetCapacity-=length; |
+ |
+ /* normal end of conversion: prepare for a new character */ |
+ c=0; |
+ goto loop; |
+ } else { |
+ uint8_t *p; |
+ |
+ /* |
+ * We actually do this backwards here: |
+ * In order to save an intermediate variable, we output |
+ * first to the overflow buffer what does not fit into the |
+ * regular target. |
+ */ |
+ /* we know that 0<=targetCapacity<length<=4 */ |
+ /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ |
+ length-=targetCapacity; |
+ p=(uint8_t *)cnv->charErrorBuffer; |
+ switch(length) { |
+ /* each branch falls through to the next one */ |
+ case 4: |
+ *p++=(uint8_t)(c>>24); |
+ case 3: |
+ *p++=(uint8_t)(c>>16); |
+ case 2: |
+ *p++=(uint8_t)(c>>8); |
+ case 1: |
+ *p=(uint8_t)c; |
+ default: |
+ /* will never occur */ |
+ break; |
+ } |
+ cnv->charErrorBufferLength=(int8_t)length; |
+ |
+ /* now output what fits into the regular target */ |
+ c>>=8*length; /* length was reduced by targetCapacity */ |
+ switch(targetCapacity) { |
+ /* each branch falls through to the next one */ |
+ case 3: |
+ *target++=(uint8_t)(c>>16); |
+ case 2: |
+ *target++=(uint8_t)(c>>8); |
+ case 1: |
+ *target++=(uint8_t)c; |
+ default: |
+ break; |
+ } |
+ |
+ /* target overflow */ |
+ targetCapacity=0; |
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
+ c=0; |
+ goto endloop; |
+ } |
+} |
+ |
+/* miscellaneous ------------------------------------------------------------ */ |
+ |
+static const char * |
+_SCSUGetName(const UConverter *cnv) { |
+ SCSUData *scsu=(SCSUData *)cnv->extraInfo; |
+ |
+ switch(scsu->locale) { |
+ case l_ja: |
+ return "SCSU,locale=ja"; |
+ default: |
+ return "SCSU"; |
+ } |
+} |
+ |
+/* structure for SafeClone calculations */ |
+struct cloneSCSUStruct |
+{ |
+ UConverter cnv; |
+ SCSUData mydata; |
+}; |
+ |
+static UConverter * |
+_SCSUSafeClone(const UConverter *cnv, |
+ void *stackBuffer, |
+ int32_t *pBufferSize, |
+ UErrorCode *status) |
+{ |
+ struct cloneSCSUStruct * localClone; |
+ int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); |
+ |
+ if (U_FAILURE(*status)){ |
+ return 0; |
+ } |
+ |
+ if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ |
+ *pBufferSize = bufferSizeNeeded; |
+ return 0; |
+ } |
+ |
+ localClone = (struct cloneSCSUStruct *)stackBuffer; |
+ /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
+ |
+ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); |
+ localClone->cnv.extraInfo = &localClone->mydata; |
+ localClone->cnv.isExtraLocal = TRUE; |
+ |
+ return &localClone->cnv; |
+} |
+ |
+ |
+static const UConverterImpl _SCSUImpl={ |
+ UCNV_SCSU, |
+ |
+ NULL, |
+ NULL, |
+ |
+ _SCSUOpen, |
+ _SCSUClose, |
+ _SCSUReset, |
+ |
+ _SCSUToUnicode, |
+ _SCSUToUnicodeWithOffsets, |
+ _SCSUFromUnicode, |
+ _SCSUFromUnicodeWithOffsets, |
+ NULL, |
+ |
+ NULL, |
+ _SCSUGetName, |
+ NULL, |
+ _SCSUSafeClone, |
+ ucnv_getCompleteUnicodeSet |
+}; |
+ |
+static const UConverterStaticData _SCSUStaticData={ |
+ sizeof(UConverterStaticData), |
+ "SCSU", |
+ 1212, /* CCSID for SCSU */ |
+ UCNV_IBM, UCNV_SCSU, |
+ 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ |
+ /* |
+ * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode |
+ * substitution string. |
+ */ |
+ { 0x0e, 0xff, 0xfd, 0 }, 3, |
+ FALSE, FALSE, |
+ 0, |
+ 0, |
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
+}; |
+ |
+const UConverterSharedData _SCSUData={ |
+ sizeof(UConverterSharedData), ~((uint32_t)0), |
+ NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl, |
+ 0 |
+}; |
+ |
+#endif |
Property changes on: icu46/source/common/ucnvscsu.c |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |