OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2000-2014, International Business Machines | 3 * Copyright (C) 2000-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * file name: ucnv2022.cpp | 6 * file name: ucnv2022.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
11 * created on: 2000feb03 | 11 * created on: 2000feb03 |
12 * created by: Markus W. Scherer | 12 * created by: Markus W. Scherer |
13 * | 13 * |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
68 * This means, for example, that when ISO-8859-7 is designated, the following | 68 * This means, for example, that when ISO-8859-7 is designated, the following |
69 * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. | 69 * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. |
70 * The ICU ISO-2022 converter does not handle this - and has no information | 70 * The ICU ISO-2022 converter does not handle this - and has no information |
71 * about which subconverter would have to be shifted vs. which is designed | 71 * about which subconverter would have to be shifted vs. which is designed |
72 * for 7-bit ISO-2022. | 72 * for 7-bit ISO-2022. |
73 * | 73 * |
74 * Markus Scherer 2003-dec-03 | 74 * Markus Scherer 2003-dec-03 |
75 */ | 75 */ |
76 #endif | 76 #endif |
77 | 77 |
| 78 #if !UCONFIG_ONLY_HTML_CONVERSION |
78 static const char SHIFT_IN_STR[] = "\x0F"; | 79 static const char SHIFT_IN_STR[] = "\x0F"; |
79 // static const char SHIFT_OUT_STR[] = "\x0E"; | 80 // static const char SHIFT_OUT_STR[] = "\x0E"; |
| 81 #endif |
80 | 82 |
81 #define CR 0x0D | 83 #define CR 0x0D |
82 #define LF 0x0A | 84 #define LF 0x0A |
83 #define H_TAB 0x09 | 85 #define H_TAB 0x09 |
84 #define V_TAB 0x0B | 86 #define V_TAB 0x0B |
85 #define SPACE 0x20 | 87 #define SPACE 0x20 |
86 | 88 |
87 enum { | 89 enum { |
88 HWKANA_START=0xff61, | 90 HWKANA_START=0xff61, |
89 HWKANA_END=0xff9f | 91 HWKANA_END=0xff9f |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
145 CNS_11643_1, | 147 CNS_11643_1, |
146 CNS_11643_2, | 148 CNS_11643_2, |
147 CNS_11643_3, | 149 CNS_11643_3, |
148 CNS_11643_4, | 150 CNS_11643_4, |
149 CNS_11643_5, | 151 CNS_11643_5, |
150 CNS_11643_6, | 152 CNS_11643_6, |
151 CNS_11643_7 | 153 CNS_11643_7 |
152 } StateEnum; | 154 } StateEnum; |
153 | 155 |
154 /* is the StateEnum charset value for a DBCS charset? */ | 156 /* is the StateEnum charset value for a DBCS charset? */ |
155 #if UCONFIG_NO_NON_HTML5_CONVERSION | 157 #if UCONFIG_ONLY_HTML_CONVERSION |
156 #define IS_JP_DBCS(cs) (JISX208==(cs)) | 158 #define IS_JP_DBCS(cs) (JISX208==(cs)) |
157 #else | 159 #else |
158 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) | 160 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
159 #endif | 161 #endif |
160 | 162 |
161 #define CSM(cs) ((uint16_t)1<<(cs)) | 163 #define CSM(cs) ((uint16_t)1<<(cs)) |
162 | 164 |
163 /* | 165 /* |
164 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence | 166 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence |
165 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x | 167 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x |
166 * | 168 * |
167 * Note: The converter uses some leniency: | 169 * Note: The converter uses some leniency: |
168 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in | 170 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in |
169 * all versions, not just JIS7 and JIS8. | 171 * all versions, not just JIS7 and JIS8. |
170 * - ICU does not distinguish between different versions of JIS X 0208. | 172 * - ICU does not distinguish between different versions of JIS X 0208. |
171 */ | 173 */ |
172 #if UCONFIG_NO_NON_HTML5_CONVERSION | 174 #if UCONFIG_ONLY_HTML_CONVERSION |
173 enum { MAX_JA_VERSION=0 }; | 175 enum { MAX_JA_VERSION=0 }; |
174 #else | 176 #else |
175 enum { MAX_JA_VERSION=4 }; | 177 enum { MAX_JA_VERSION=4 }; |
176 #endif | 178 #endif |
177 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 179 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
178 /* | |
179 * TODO(jshin): The encoding spec has JISX212, but we don't support it. | |
180 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 | |
181 */ | |
182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 180 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
183 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 181 #if !UCONFIG_ONLY_HTML_CONVERSION |
184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 183 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
188 #endif | 186 #endif |
189 }; | 187 }; |
190 | 188 |
191 typedef enum { | 189 typedef enum { |
192 ASCII1=0, | 190 ASCII1=0, |
193 LATIN1, | 191 LATIN1, |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 368 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 369 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 | 370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 |
373 }; | 371 }; |
374 | 372 |
375 /* Type def for refactoring changeState_2022 code*/ | 373 /* Type def for refactoring changeState_2022 code*/ |
376 typedef enum{ | 374 typedef enum{ |
377 #ifdef U_ENABLE_GENERIC_ISO_2022 | 375 #ifdef U_ENABLE_GENERIC_ISO_2022 |
378 ISO_2022=0, | 376 ISO_2022=0, |
379 #endif | 377 #endif |
380 #if UCONFIG_NO_NON_HTML5_CONVERSION | |
381 ISO_2022_JP=1 | |
382 #else | |
383 ISO_2022_JP=1, | 378 ISO_2022_JP=1, |
| 379 #if !UCONFIG_ONLY_HTML_CONVERSION |
384 ISO_2022_KR=2, | 380 ISO_2022_KR=2, |
385 ISO_2022_CN=3 | 381 ISO_2022_CN=3 |
386 #endif | 382 #endif |
387 } Variant2022; | 383 } Variant2022; |
388 | 384 |
389 /*********** ISO 2022 Converter Protos ***********/ | 385 /*********** ISO 2022 Converter Protos ***********/ |
390 static void | 386 static void |
391 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); | 387 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); |
392 | 388 |
393 static void | 389 static void |
(...skipping 13 matching lines...) Expand all Loading... |
407 | 403 |
408 #ifdef U_ENABLE_GENERIC_ISO_2022 | 404 #ifdef U_ENABLE_GENERIC_ISO_2022 |
409 static void | 405 static void |
410 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UEr
rorCode* err); | 406 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UEr
rorCode* err); |
411 #endif | 407 #endif |
412 | 408 |
413 namespace { | 409 namespace { |
414 | 410 |
415 /*const UConverterSharedData _ISO2022Data;*/ | 411 /*const UConverterSharedData _ISO2022Data;*/ |
416 extern const UConverterSharedData _ISO2022JPData; | 412 extern const UConverterSharedData _ISO2022JPData; |
| 413 |
| 414 #if !UCONFIG_ONLY_HTML_CONVERSION |
417 extern const UConverterSharedData _ISO2022KRData; | 415 extern const UConverterSharedData _ISO2022KRData; |
418 extern const UConverterSharedData _ISO2022CNData; | 416 extern const UConverterSharedData _ISO2022CNData; |
| 417 #endif |
419 | 418 |
420 } // namespace | 419 } // namespace |
421 | 420 |
422 /*************** Converter implementations ******************/ | 421 /*************** Converter implementations ******************/ |
423 | 422 |
424 /* The purpose of this function is to get around gcc compiler warnings. */ | 423 /* The purpose of this function is to get around gcc compiler warnings. */ |
425 static inline void | 424 static inline void |
426 fromUWriteUInt8(UConverter *cnv, | 425 fromUWriteUInt8(UConverter *cnv, |
427 const char *bytes, int32_t length, | 426 const char *bytes, int32_t length, |
428 uint8_t **target, const char *targetLimit, | 427 uint8_t **target, const char *targetLimit, |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
493 myConverterData->version = version; | 492 myConverterData->version = version; |
494 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && | 493 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && |
495 (myLocale[2]=='_' || myLocale[2]=='\0')) | 494 (myLocale[2]=='_' || myLocale[2]=='\0')) |
496 { | 495 { |
497 size_t len=0; | 496 size_t len=0; |
498 /* open the required converters and cache them */ | 497 /* open the required converters and cache them */ |
499 if(version>MAX_JA_VERSION) { | 498 if(version>MAX_JA_VERSION) { |
500 /* prevent indexing beyond jpCharsetMasks[] */ | 499 /* prevent indexing beyond jpCharsetMasks[] */ |
501 myConverterData->version = version = 0; | 500 myConverterData->version = version = 0; |
502 } | 501 } |
503 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
504 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | 502 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
505 myConverterData->myConverterArray[ISO8859_7] = | 503 myConverterData->myConverterArray[ISO8859_7] = |
506 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); | 504 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); |
507 } | 505 } |
508 #endif | |
509 myConverterData->myConverterArray[JISX208] = | 506 myConverterData->myConverterArray[JISX208] = |
510 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); | 507 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); |
511 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
512 if(jpCharsetMasks[version]&CSM(JISX212)) { | 508 if(jpCharsetMasks[version]&CSM(JISX212)) { |
513 myConverterData->myConverterArray[JISX212] = | 509 myConverterData->myConverterArray[JISX212] = |
514 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); | 510 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); |
515 } | 511 } |
516 if(jpCharsetMasks[version]&CSM(GB2312)) { | 512 if(jpCharsetMasks[version]&CSM(GB2312)) { |
517 myConverterData->myConverterArray[GB2312] = | 513 myConverterData->myConverterArray[GB2312] = |
518 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ | 514 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ |
519 } | 515 } |
520 if(jpCharsetMasks[version]&CSM(KSC5601)) { | 516 if(jpCharsetMasks[version]&CSM(KSC5601)) { |
521 myConverterData->myConverterArray[KSC5601] = | 517 myConverterData->myConverterArray[KSC5601] = |
522 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); | 518 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); |
523 } | 519 } |
524 #endif | |
525 | 520 |
526 /* set the function pointers to appropriate funtions */ | 521 /* set the function pointers to appropriate funtions */ |
527 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 522 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
528 uprv_strcpy(myConverterData->locale,"ja"); | 523 uprv_strcpy(myConverterData->locale,"ja"); |
529 | 524 |
530 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); | 525 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); |
531 len = uprv_strlen(myConverterData->name); | 526 len = uprv_strlen(myConverterData->name); |
532 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; | 527 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; |
533 myConverterData->name[len+1]='\0'; | 528 myConverterData->name[len+1]='\0'; |
534 } | 529 } |
535 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 530 #if !UCONFIG_ONLY_HTML_CONVERSION |
536 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 531 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
537 (myLocale[2]=='_' || myLocale[2]=='\0')) | 532 (myLocale[2]=='_' || myLocale[2]=='\0')) |
538 { | 533 { |
539 const char *cnvName; | 534 const char *cnvName; |
540 if(version==1) { | 535 if(version==1) { |
541 cnvName="icu-internal-25546"; | 536 cnvName="icu-internal-25546"; |
542 } else { | 537 } else { |
543 cnvName="ibm-949"; | 538 cnvName="ibm-949"; |
544 myConverterData->version=version=0; | 539 myConverterData->version=version=0; |
545 } | 540 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
595 myConverterData->version = 0; | 590 myConverterData->version = 0; |
596 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); | 591 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); |
597 }else if (version==1){ | 592 }else if (version==1){ |
598 myConverterData->version = 1; | 593 myConverterData->version = 1; |
599 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); | 594 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); |
600 }else { | 595 }else { |
601 myConverterData->version = 2; | 596 myConverterData->version = 2; |
602 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); | 597 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); |
603 } | 598 } |
604 } | 599 } |
605 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION | 600 #endif // !UCONFIG_ONLY_HTML_CONVERSION |
606 else{ | 601 else{ |
607 #ifdef U_ENABLE_GENERIC_ISO_2022 | 602 #ifdef U_ENABLE_GENERIC_ISO_2022 |
608 myConverterData->isFirstBuffer = TRUE; | 603 myConverterData->isFirstBuffer = TRUE; |
609 | 604 |
610 /* append the UTF-8 escape sequence */ | 605 /* append the UTF-8 escape sequence */ |
611 cnv->charErrorBufferLength = 3; | 606 cnv->charErrorBufferLength = 3; |
612 cnv->charErrorBuffer[0] = 0x1b; | 607 cnv->charErrorBuffer[0] = 0x1b; |
613 cnv->charErrorBuffer[1] = 0x25; | 608 cnv->charErrorBuffer[1] = 0x25; |
614 cnv->charErrorBuffer[2] = 0x42; | 609 cnv->charErrorBuffer[2] = 0x42; |
615 | 610 |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
730 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 725 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
731 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE | 726 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE |
732 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 727 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
733 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE | 728 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE |
734 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 729 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
735 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 730 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 731 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 732 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
738 }; | 733 }; |
739 | 734 |
740 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 735 #if !UCONFIG_ONLY_HTML_CONVERSION |
741 /*************** to unicode *******************/ | 736 /*************** to unicode *******************/ |
742 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | 737 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
743 /* 0 1 2 3 4
5 6 7 8 9 */ | 738 /* 0 1 2 3 4
5 6 7 8 9 */ |
744 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 739 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
745 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 740 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
746 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 741 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 742 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 | 743 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 |
749 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 744 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 745 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
896 if(myData2022->toU2022State.g<2) { | 891 if(myData2022->toU2022State.g<2) { |
897 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; | 892 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; |
898 } | 893 } |
899 myData2022->toU2022State.g=2; | 894 myData2022->toU2022State.g=2; |
900 } else { | 895 } else { |
901 /* illegal to have SS2 before a matching designator */ | 896 /* illegal to have SS2 before a matching designator */ |
902 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 897 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
903 } | 898 } |
904 break; | 899 break; |
905 /* case SS3_STATE: not used in ISO-2022-JP-x */ | 900 /* case SS3_STATE: not used in ISO-2022-JP-x */ |
906 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
907 case ISO8859_1: | 901 case ISO8859_1: |
908 case ISO8859_7: | 902 case ISO8859_7: |
909 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 903 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
910 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 904 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
911 } else { | 905 } else { |
912 /* G2 charset for SS2 */ | 906 /* G2 charset for SS2 */ |
913 myData2022->toU2022State.cs[2]=(int8_t)tempState; | 907 myData2022->toU2022State.cs[2]=(int8_t)tempState; |
914 } | 908 } |
915 break; | 909 break; |
916 #endif | |
917 default: | 910 default: |
918 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 911 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
919 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 912 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
920 } else { | 913 } else { |
921 /* G0 charset */ | 914 /* G0 charset */ |
922 myData2022->toU2022State.cs[0]=(int8_t)tempState; | 915 myData2022->toU2022State.cs[0]=(int8_t)tempState; |
923 } | 916 } |
924 break; | 917 break; |
925 } | 918 } |
926 } | 919 } |
927 break; | 920 break; |
928 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 921 #if !UCONFIG_ONLY_HTML_CONVERSION |
929 case ISO_2022_CN: | 922 case ISO_2022_CN: |
930 { | 923 { |
931 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; | 924 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
932 switch(tempState) { | 925 switch(tempState) { |
933 case INVALID_STATE: | 926 case INVALID_STATE: |
934 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 927 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
935 break; | 928 break; |
936 case SS2_STATE: | 929 case SS2_STATE: |
937 if(myData2022->toU2022State.cs[2]!=0) { | 930 if(myData2022->toU2022State.cs[2]!=0) { |
938 if(myData2022->toU2022State.g<2) { | 931 if(myData2022->toU2022State.g<2) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
980 } | 973 } |
981 } | 974 } |
982 break; | 975 break; |
983 case ISO_2022_KR: | 976 case ISO_2022_KR: |
984 if(offset==0x30){ | 977 if(offset==0x30){ |
985 /* nothing to be done, just accept this one escape sequence */ | 978 /* nothing to be done, just accept this one escape sequence */ |
986 } else { | 979 } else { |
987 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 980 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
988 } | 981 } |
989 break; | 982 break; |
990 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 983 #endif // !UCONFIG_ONLY_HTML_CONVERSION |
991 | 984 |
992 default: | 985 default: |
993 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 986 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
994 break; | 987 break; |
995 } | 988 } |
996 } | 989 } |
997 if(U_SUCCESS(*err)) { | 990 if(U_SUCCESS(*err)) { |
998 _this->toULength = 0; | 991 _this->toULength = 0; |
999 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { | 992 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { |
1000 if(_this->toULength>1) { | 993 if(_this->toULength>1) { |
(...skipping 22 matching lines...) Expand all Loading... |
1023 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng
th); | 1016 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng
th); |
1024 *source-=bytesFromThisBuffer; | 1017 *source-=bytesFromThisBuffer; |
1025 } | 1018 } |
1026 _this->toULength=1; | 1019 _this->toULength=1; |
1027 } | 1020 } |
1028 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { | 1021 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { |
1029 _this->toUCallbackReason = UCNV_UNASSIGNED; | 1022 _this->toUCallbackReason = UCNV_UNASSIGNED; |
1030 } | 1023 } |
1031 } | 1024 } |
1032 | 1025 |
| 1026 #if !UCONFIG_ONLY_HTML_CONVERSION |
1033 /*Checks the characters of the buffer against valid 2022 escape sequences | 1027 /*Checks the characters of the buffer against valid 2022 escape sequences |
1034 *if the match we return a pointer to the initial start of the sequence otherwise | 1028 *if the match we return a pointer to the initial start of the sequence otherwise |
1035 *we return sourceLimit | 1029 *we return sourceLimit |
1036 */ | 1030 */ |
1037 /*for 2022 looks ahead in the stream | 1031 /*for 2022 looks ahead in the stream |
1038 *to determine the longest possible convertible | 1032 *to determine the longest possible convertible |
1039 *data stream | 1033 *data stream |
1040 */ | 1034 */ |
1041 static inline const char* | 1035 static inline const char* |
1042 getEndOfBuffer_2022(const char** source, | 1036 getEndOfBuffer_2022(const char** source, |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1077 }while (++mySource < sourceLimit); | 1071 }while (++mySource < sourceLimit); |
1078 | 1072 |
1079 return sourceLimit; | 1073 return sourceLimit; |
1080 #else | 1074 #else |
1081 while(mySource < sourceLimit && *mySource != ESC_2022) { | 1075 while(mySource < sourceLimit && *mySource != ESC_2022) { |
1082 ++mySource; | 1076 ++mySource; |
1083 } | 1077 } |
1084 return mySource; | 1078 return mySource; |
1085 #endif | 1079 #endif |
1086 } | 1080 } |
1087 | 1081 #endif |
1088 | 1082 |
1089 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmb
cs.c | 1083 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmb
cs.c |
1090 * any future change in _MBCSFromUChar32() function should be reflected here. | 1084 * any future change in _MBCSFromUChar32() function should be reflected here. |
1091 * @return number of bytes in *value; negative number if fallback; 0 if no mappi
ng | 1085 * @return number of bytes in *value; negative number if fallback; 0 if no mappi
ng |
1092 */ | 1086 */ |
1093 static inline int32_t | 1087 static inline int32_t |
1094 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, | 1088 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, |
1095 UChar32 c, | 1089 UChar32 c, |
1096 uint32_t* value, | 1090 uint32_t* value, |
1097 UBool useFallback, | 1091 UBool useFallback, |
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1401 * KSC5601 : alias to ibm-949 mapping table | 1395 * KSC5601 : alias to ibm-949 mapping table |
1402 * GB2312 : alias to ibm-1386 mapping table | 1396 * GB2312 : alias to ibm-1386 mapping table |
1403 * ISO-8859-1 : Algorithmic implemented as LATIN1 case | 1397 * ISO-8859-1 : Algorithmic implemented as LATIN1 case |
1404 * ISO-8859-7 : alisas to ibm-9409 mapping table | 1398 * ISO-8859-7 : alisas to ibm-9409 mapping table |
1405 */ | 1399 */ |
1406 | 1400 |
1407 /* preference order of JP charsets */ | 1401 /* preference order of JP charsets */ |
1408 static const StateEnum jpCharsetPref[]={ | 1402 static const StateEnum jpCharsetPref[]={ |
1409 ASCII, | 1403 ASCII, |
1410 JISX201, | 1404 JISX201, |
1411 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1412 ISO8859_1, | 1405 ISO8859_1, |
1413 ISO8859_7, | 1406 ISO8859_7, |
1414 #endif | |
1415 JISX208, | 1407 JISX208, |
1416 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1417 JISX212, | 1408 JISX212, |
1418 GB2312, | 1409 GB2312, |
1419 KSC5601, | 1410 KSC5601, |
1420 #endif | |
1421 HWKANA_7BIT | 1411 HWKANA_7BIT |
1422 }; | 1412 }; |
1423 | 1413 |
1424 /* | 1414 /* |
1425 * The escape sequences must be in order of the enum constants like JISX201 = 3
, | 1415 * The escape sequences must be in order of the enum constants like JISX201 = 3
, |
1426 * not in order of jpCharsetPref[]! | 1416 * not in order of jpCharsetPref[]! |
1427 */ | 1417 */ |
1428 static const char escSeqChars[][6] ={ | 1418 static const char escSeqChars[][6] ={ |
1429 "\x1B\x28\x42", /* <ESC>(B ASCII */ | 1419 "\x1B\x28\x42", /* <ESC>(B ASCII */ |
1430 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ | 1420 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ |
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1780 int8_t cs0 = choices[i]; | 1770 int8_t cs0 = choices[i]; |
1781 switch(cs0) { | 1771 switch(cs0) { |
1782 case ASCII: | 1772 case ASCII: |
1783 if(sourceChar <= 0x7f) { | 1773 if(sourceChar <= 0x7f) { |
1784 targetValue = (uint32_t)sourceChar; | 1774 targetValue = (uint32_t)sourceChar; |
1785 len = 1; | 1775 len = 1; |
1786 cs = cs0; | 1776 cs = cs0; |
1787 g = 0; | 1777 g = 0; |
1788 } | 1778 } |
1789 break; | 1779 break; |
1790 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1791 case ISO8859_1: | 1780 case ISO8859_1: |
1792 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { | 1781 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
1793 targetValue = (uint32_t)sourceChar - 0x80; | 1782 targetValue = (uint32_t)sourceChar - 0x80; |
1794 len = 1; | 1783 len = 1; |
1795 cs = cs0; | 1784 cs = cs0; |
1796 g = 2; | 1785 g = 2; |
1797 } | 1786 } |
1798 break; | 1787 break; |
1799 #endif | |
1800 case HWKANA_7BIT: | 1788 case HWKANA_7BIT: |
1801 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { | 1789 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { |
1802 if(converterData->version==3) { | 1790 if(converterData->version==3) { |
1803 /* JIS7: use G1 (SO) */ | 1791 /* JIS7: use G1 (SO) */ |
1804 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ | 1792 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ |
1805 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); | 1793 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); |
1806 len = 1; | 1794 len = 1; |
1807 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ | 1795 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ |
1808 g = 1; | 1796 g = 1; |
1809 } else if(converterData->version==4) { | 1797 } else if(converterData->version==4) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1851 } | 1839 } |
1852 } else if(len == 0 && useFallback && | 1840 } else if(len == 0 && useFallback && |
1853 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { | 1841 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { |
1854 targetValue = hwkana_fb[sourceChar - HWKANA_START]; | 1842 targetValue = hwkana_fb[sourceChar - HWKANA_START]; |
1855 len = -2; | 1843 len = -2; |
1856 cs = cs0; | 1844 cs = cs0; |
1857 g = 0; | 1845 g = 0; |
1858 useFallback = FALSE; | 1846 useFallback = FALSE; |
1859 } | 1847 } |
1860 break; | 1848 break; |
1861 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1862 case ISO8859_7: | 1849 case ISO8859_7: |
1863 /* G0 SBCS forced to 7-bit output */ | 1850 /* G0 SBCS forced to 7-bit output */ |
1864 len2 = MBCS_SINGLE_FROM_UCHAR32( | 1851 len2 = MBCS_SINGLE_FROM_UCHAR32( |
1865 converterData->myConverterArray[cs0], | 1852 converterData->myConverterArray[cs0], |
1866 sourceChar, &value, | 1853 sourceChar, &value, |
1867 useFallback); | 1854 useFallback); |
1868 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { | 1855 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { |
1869 targetValue = value - 0x80; | 1856 targetValue = value - 0x80; |
1870 len = len2; | 1857 len = len2; |
1871 cs = cs0; | 1858 cs = cs0; |
1872 g = 2; | 1859 g = 2; |
1873 useFallback = FALSE; | 1860 useFallback = FALSE; |
1874 } | 1861 } |
1875 break; | 1862 break; |
1876 #endif | |
1877 default: | 1863 default: |
1878 /* G0 DBCS */ | 1864 /* G0 DBCS */ |
1879 len2 = MBCS_FROM_UCHAR32_ISO2022( | 1865 len2 = MBCS_FROM_UCHAR32_ISO2022( |
1880 converterData->myConverterArray[cs0], | 1866 converterData->myConverterArray[cs0], |
1881 sourceChar, &value, | 1867 sourceChar, &value, |
1882 useFallback, MBCS_OUTPUT_2); | 1868 useFallback, MBCS_OUTPUT_2); |
1883 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ | 1869 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ |
1884 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1885 if(cs0 == KSC5601) { | 1870 if(cs0 == KSC5601) { |
1886 /* | 1871 /* |
1887 * Check for valid bytes for the encoding scheme. | 1872 * Check for valid bytes for the encoding scheme. |
1888 * This is necessary because the sub-converter (wind
ows-949) | 1873 * This is necessary because the sub-converter (wind
ows-949) |
1889 * has a broader encoding scheme than is valid for 2
022. | 1874 * has a broader encoding scheme than is valid for 2
022. |
1890 */ | 1875 */ |
1891 value = _2022FromGR94DBCS(value); | 1876 value = _2022FromGR94DBCS(value); |
1892 if(value == 0) { | 1877 if(value == 0) { |
1893 break; | 1878 break; |
1894 } | 1879 } |
1895 } | 1880 } |
1896 #endif | |
1897 targetValue = value; | 1881 targetValue = value; |
1898 len = len2; | 1882 len = len2; |
1899 cs = cs0; | 1883 cs = cs0; |
1900 g = 0; | 1884 g = 0; |
1901 useFallback = FALSE; | 1885 useFallback = FALSE; |
1902 } | 1886 } |
1903 break; | 1887 break; |
1904 } | 1888 } |
1905 } | 1889 } |
1906 | 1890 |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2180 /* return from a single-shift state to the previous one */ | 2164 /* return from a single-shift state to the previous one */ |
2181 if(pToU2022State->g >= 2) { | 2165 if(pToU2022State->g >= 2) { |
2182 pToU2022State->g=pToU2022State->prevG; | 2166 pToU2022State->g=pToU2022State->prevG; |
2183 } | 2167 } |
2184 } else switch(cs) { | 2168 } else switch(cs) { |
2185 case ASCII: | 2169 case ASCII: |
2186 if(mySourceChar <= 0x7f) { | 2170 if(mySourceChar <= 0x7f) { |
2187 targetUniChar = mySourceChar; | 2171 targetUniChar = mySourceChar; |
2188 } | 2172 } |
2189 break; | 2173 break; |
2190 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
2191 case ISO8859_1: | 2174 case ISO8859_1: |
2192 if(mySourceChar <= 0x7f) { | 2175 if(mySourceChar <= 0x7f) { |
2193 targetUniChar = mySourceChar + 0x80; | 2176 targetUniChar = mySourceChar + 0x80; |
2194 } | 2177 } |
2195 /* return from a single-shift state to the previous one */ | 2178 /* return from a single-shift state to the previous one */ |
2196 pToU2022State->g=pToU2022State->prevG; | 2179 pToU2022State->g=pToU2022State->prevG; |
2197 break; | 2180 break; |
2198 case ISO8859_7: | 2181 case ISO8859_7: |
2199 if(mySourceChar <= 0x7f) { | 2182 if(mySourceChar <= 0x7f) { |
2200 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ | 2183 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ |
2201 targetUniChar = | 2184 targetUniChar = |
2202 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( | 2185 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( |
2203 myData->myConverterArray[cs], | 2186 myData->myConverterArray[cs], |
2204 mySourceChar + 0x80); | 2187 mySourceChar + 0x80); |
2205 } | 2188 } |
2206 /* return from a single-shift state to the previous one */ | 2189 /* return from a single-shift state to the previous one */ |
2207 pToU2022State->g=pToU2022State->prevG; | 2190 pToU2022State->g=pToU2022State->prevG; |
2208 break; | 2191 break; |
2209 #endif | |
2210 case JISX201: | 2192 case JISX201: |
2211 if(mySourceChar <= 0x7f) { | 2193 if(mySourceChar <= 0x7f) { |
2212 targetUniChar = jisx201ToU(mySourceChar); | 2194 targetUniChar = jisx201ToU(mySourceChar); |
2213 } | 2195 } |
2214 break; | 2196 break; |
2215 case HWKANA_7BIT: | 2197 case HWKANA_7BIT: |
2216 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { | 2198 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { |
2217 /* 7-bit halfwidth Katakana */ | 2199 /* 7-bit halfwidth Katakana */ |
2218 targetUniChar = mySourceChar + (HWKANA_START - 0x21); | 2200 targetUniChar = mySourceChar + (HWKANA_START - 0x21); |
2219 } | 2201 } |
(...skipping 19 matching lines...) Expand all Loading... |
2239 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; | 2221 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; |
2240 if (leadIsOk && trailIsOk) { | 2222 if (leadIsOk && trailIsOk) { |
2241 ++mySource; | 2223 ++mySource; |
2242 tmpSourceChar = (mySourceChar << 8) | trailByte; | 2224 tmpSourceChar = (mySourceChar << 8) | trailByte; |
2243 if(cs == JISX208) { | 2225 if(cs == JISX208) { |
2244 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); | 2226 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); |
2245 mySourceChar = tmpSourceChar; | 2227 mySourceChar = tmpSourceChar; |
2246 } else { | 2228 } else { |
2247 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ | 2229 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ |
2248 mySourceChar = tmpSourceChar; | 2230 mySourceChar = tmpSourceChar; |
2249 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
2250 if (cs == KSC5601) { | 2231 if (cs == KSC5601) { |
2251 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ | 2232 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ |
2252 } | 2233 } |
2253 #endif | |
2254 tempBuf[0] = (char)(tmpSourceChar >> 8); | 2234 tempBuf[0] = (char)(tmpSourceChar >> 8); |
2255 tempBuf[1] = (char)(tmpSourceChar); | 2235 tempBuf[1] = (char)(tmpSourceChar); |
2256 } | 2236 } |
2257 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); | 2237 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); |
2258 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { | 2238 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { |
2259 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ | 2239 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ |
2260 ++mySource; | 2240 ++mySource; |
2261 /* add another bit so that the code below writes 2 b
ytes in case of error */ | 2241 /* add another bit so that the code below writes 2 b
ytes in case of error */ |
2262 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; | 2242 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; |
2263 } | 2243 } |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2305 *err =U_BUFFER_OVERFLOW_ERROR; | 2285 *err =U_BUFFER_OVERFLOW_ERROR; |
2306 break; | 2286 break; |
2307 } | 2287 } |
2308 } | 2288 } |
2309 endloop: | 2289 endloop: |
2310 args->target = myTarget; | 2290 args->target = myTarget; |
2311 args->source = mySource; | 2291 args->source = mySource; |
2312 } | 2292 } |
2313 | 2293 |
2314 | 2294 |
2315 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 2295 #if !UCONFIG_ONLY_HTML_CONVERSION |
2316 /*************************************************************** | 2296 /*************************************************************** |
2317 * Rules for ISO-2022-KR encoding | 2297 * Rules for ISO-2022-KR encoding |
2318 * i) The KSC5601 designator sequence should appear only once in a file, | 2298 * i) The KSC5601 designator sequence should appear only once in a file, |
2319 * at the begining of a line before any KSC5601 characters. This usually | 2299 * at the begining of a line before any KSC5601 characters. This usually |
2320 * means that it appears by itself on the first line of the file | 2300 * means that it appears by itself on the first line of the file |
2321 * ii) There are only 2 shifting sequences SO to shift into double byte mode | 2301 * ii) There are only 2 shifting sequences SO to shift into double byte mode |
2322 * and SI to shift into single byte mode | 2302 * and SI to shift into single byte mode |
2323 */ | 2303 */ |
2324 static void | 2304 static void |
2325 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ | 2305 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ |
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3449 } | 3429 } |
3450 else{ | 3430 else{ |
3451 *err =U_BUFFER_OVERFLOW_ERROR; | 3431 *err =U_BUFFER_OVERFLOW_ERROR; |
3452 break; | 3432 break; |
3453 } | 3433 } |
3454 } | 3434 } |
3455 endloop: | 3435 endloop: |
3456 args->target = myTarget; | 3436 args->target = myTarget; |
3457 args->source = mySource; | 3437 args->source = mySource; |
3458 } | 3438 } |
3459 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 3439 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ |
3460 | 3440 |
3461 static void | 3441 static void |
3462 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { | 3442 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { |
3463 UConverter *cnv = args->converter; | 3443 UConverter *cnv = args->converter; |
3464 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; | 3444 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; |
3465 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; | 3445 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; |
3466 char *p, *subchar; | 3446 char *p, *subchar; |
3467 char buffer[8]; | 3447 char buffer[8]; |
3468 int32_t length; | 3448 int32_t length; |
3469 | 3449 |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3651 #endif | 3631 #endif |
3652 | 3632 |
3653 cnvData = (UConverterDataISO2022*)cnv->extraInfo; | 3633 cnvData = (UConverterDataISO2022*)cnv->extraInfo; |
3654 | 3634 |
3655 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ | 3635 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ |
3656 switch(cnvData->locale[0]){ | 3636 switch(cnvData->locale[0]){ |
3657 case 'j': | 3637 case 'j': |
3658 /* include JIS X 0201 which is hardcoded */ | 3638 /* include JIS X 0201 which is hardcoded */ |
3659 sa->add(sa->set, 0xa5); | 3639 sa->add(sa->set, 0xa5); |
3660 sa->add(sa->set, 0x203e); | 3640 sa->add(sa->set, 0x203e); |
3661 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
3662 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | 3641 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
3663 /* include Latin-1 for some variants of JP */ | 3642 /* include Latin-1 for some variants of JP */ |
3664 sa->addRange(sa->set, 0, 0xff); | 3643 sa->addRange(sa->set, 0, 0xff); |
3665 } else { | 3644 } else { |
3666 /* include ASCII for JP */ | 3645 /* include ASCII for JP */ |
3667 sa->addRange(sa->set, 0, 0x7f); | 3646 sa->addRange(sa->set, 0, 0x7f); |
3668 } | 3647 } |
3669 #else | |
3670 /* include ASCII for JP */ | |
3671 sa->addRange(sa->set, 0, 0x7f); | |
3672 #endif | |
3673 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { | 3648 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { |
3674 /* | 3649 /* |
3675 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 | 3650 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 |
3676 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) | 3651 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) |
3677 * use half-width Katakana. | 3652 * use half-width Katakana. |
3678 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) | 3653 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) |
3679 * half-width Katakana via the ESC ( I sequence. | 3654 * half-width Katakana via the ESC ( I sequence. |
3680 * However, we only emit (fromUnicode) half-width Katakana according
to the | 3655 * However, we only emit (fromUnicode) half-width Katakana according
to the |
3681 * definition of each variant. | 3656 * definition of each variant. |
3682 * | 3657 * |
3683 * When including fallbacks, | 3658 * When including fallbacks, |
3684 * we need to include half-width Katakana Unicode code points for al
l JP variants because | 3659 * we need to include half-width Katakana Unicode code points for al
l JP variants because |
3685 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). | 3660 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). |
3686 */ | 3661 */ |
3687 /* include half-width Katakana for JP */ | 3662 /* include half-width Katakana for JP */ |
3688 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | 3663 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
3689 } | 3664 } |
3690 break; | 3665 break; |
3691 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 3666 #if !UCONFIG_ONLY_HTML_CONVERSION |
3692 case 'c': | 3667 case 'c': |
3693 case 'z': | 3668 case 'z': |
3694 /* include ASCII for CN */ | 3669 /* include ASCII for CN */ |
3695 sa->addRange(sa->set, 0, 0x7f); | 3670 sa->addRange(sa->set, 0, 0x7f); |
3696 break; | 3671 break; |
3697 case 'k': | 3672 case 'k': |
3698 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ | 3673 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ |
3699 cnvData->currentConverter->sharedData->impl->getUnicodeSet( | 3674 cnvData->currentConverter->sharedData->impl->getUnicodeSet( |
3700 cnvData->currentConverter, sa, which, pErrorCode); | 3675 cnvData->currentConverter, sa, which, pErrorCode); |
3701 /* the loop over myConverterArray[] will simply not find another convert
er */ | 3676 /* the loop over myConverterArray[] will simply not find another convert
er */ |
(...skipping 18 matching lines...) Expand all Loading... |
3720 | 3695 |
3721 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { | 3696 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
3722 UConverterSetFilter filter; | 3697 UConverterSetFilter filter; |
3723 if(cnvData->myConverterArray[i]!=NULL) { | 3698 if(cnvData->myConverterArray[i]!=NULL) { |
3724 if(cnvData->locale[0]=='j' && i==JISX208) { | 3699 if(cnvData->locale[0]=='j' && i==JISX208) { |
3725 /* | 3700 /* |
3726 * Only add code points that map to Shift-JIS codes | 3701 * Only add code points that map to Shift-JIS codes |
3727 * corresponding to JIS X 0208. | 3702 * corresponding to JIS X 0208. |
3728 */ | 3703 */ |
3729 filter=UCNV_SET_FILTER_SJIS; | 3704 filter=UCNV_SET_FILTER_SJIS; |
3730 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 3705 #if !UCONFIG_ONLY_HTML_CONVERSION |
3731 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 3706 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
3732 cnvData->version==0 && i==CNS_11643) { | 3707 cnvData->version==0 && i==CNS_11643) { |
3733 /* | 3708 /* |
3734 * Version-specific for CN: | 3709 * Version-specific for CN: |
3735 * CN version 0 does not map CNS planes 3..7 although | 3710 * CN version 0 does not map CNS planes 3..7 although |
3736 * they are all available in the CNS conversion table; | 3711 * they are all available in the CNS conversion table; |
3737 * CN version 1 (-EXT) does map them all. | 3712 * CN version 1 (-EXT) does map them all. |
3738 * The two versions create different Unicode sets. | 3713 * The two versions create different Unicode sets. |
3739 */ | 3714 */ |
3740 filter=UCNV_SET_FILTER_2022_CN; | 3715 filter=UCNV_SET_FILTER_2022_CN; |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3859 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ | 3834 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
3860 }; | 3835 }; |
3861 | 3836 |
3862 namespace { | 3837 namespace { |
3863 | 3838 |
3864 const UConverterSharedData _ISO2022JPData= | 3839 const UConverterSharedData _ISO2022JPData= |
3865 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022J
PImpl); | 3840 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022J
PImpl); |
3866 | 3841 |
3867 } // namespace | 3842 } // namespace |
3868 | 3843 |
3869 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 3844 #if !UCONFIG_ONLY_HTML_CONVERSION |
3870 /************* KR ***************/ | 3845 /************* KR ***************/ |
3871 static const UConverterImpl _ISO2022KRImpl={ | 3846 static const UConverterImpl _ISO2022KRImpl={ |
3872 UCNV_ISO_2022, | 3847 UCNV_ISO_2022, |
3873 | 3848 |
3874 NULL, | 3849 NULL, |
3875 NULL, | 3850 NULL, |
3876 | 3851 |
3877 _ISO2022Open, | 3852 _ISO2022Open, |
3878 _ISO2022Close, | 3853 _ISO2022Close, |
3879 _ISO2022Reset, | 3854 _ISO2022Reset, |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3960 0, | 3935 0, |
3961 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ | 3936 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
3962 }; | 3937 }; |
3963 | 3938 |
3964 namespace { | 3939 namespace { |
3965 | 3940 |
3966 const UConverterSharedData _ISO2022CNData= | 3941 const UConverterSharedData _ISO2022CNData= |
3967 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022C
NImpl); | 3942 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022C
NImpl); |
3968 | 3943 |
3969 } // namespace | 3944 } // namespace |
3970 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 3945 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ |
3971 | 3946 |
3972 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 3947 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
OLD | NEW |