OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2000-2012, International Business Machines | 3 * Copyright (C) 2000-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * file name: ucnv2022.cpp | 6 * file name: ucnv2022.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
11 * created on: 2000feb03 | 11 * created on: 2000feb03 |
12 * created by: Markus W. Scherer | 12 * created by: Markus W. Scherer |
13 * | 13 * |
(...skipping 22 matching lines...) Expand all Loading... |
36 #include "unicode/ucnv_cb.h" | 36 #include "unicode/ucnv_cb.h" |
37 #include "unicode/utf16.h" | 37 #include "unicode/utf16.h" |
38 #include "ucnv_imp.h" | 38 #include "ucnv_imp.h" |
39 #include "ucnv_bld.h" | 39 #include "ucnv_bld.h" |
40 #include "ucnv_cnv.h" | 40 #include "ucnv_cnv.h" |
41 #include "ucnvmbcs.h" | 41 #include "ucnvmbcs.h" |
42 #include "cstring.h" | 42 #include "cstring.h" |
43 #include "cmemory.h" | 43 #include "cmemory.h" |
44 #include "uassert.h" | 44 #include "uassert.h" |
45 | 45 |
46 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
47 | |
48 #ifdef U_ENABLE_GENERIC_ISO_2022 | 46 #ifdef U_ENABLE_GENERIC_ISO_2022 |
49 /* | 47 /* |
50 * I am disabling the generic ISO-2022 converter after proposing to do so on | 48 * I am disabling the generic ISO-2022 converter after proposing to do so on |
51 * the icu mailing list two days ago. | 49 * the icu mailing list two days ago. |
52 * | 50 * |
53 * Reasons: | 51 * Reasons: |
54 * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of | 52 * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of |
55 * its designation sequences, single shifts with return to the previous state
, | 53 * its designation sequences, single shifts with return to the previous state
, |
56 * switch-with-no-return to UTF-16BE or similar, etc. | 54 * switch-with-no-return to UTF-16BE or similar, etc. |
57 * This is unlike the language-specific variants like ISO-2022-JP which | 55 * This is unlike the language-specific variants like ISO-2022-JP which |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
147 CNS_11643_1, | 145 CNS_11643_1, |
148 CNS_11643_2, | 146 CNS_11643_2, |
149 CNS_11643_3, | 147 CNS_11643_3, |
150 CNS_11643_4, | 148 CNS_11643_4, |
151 CNS_11643_5, | 149 CNS_11643_5, |
152 CNS_11643_6, | 150 CNS_11643_6, |
153 CNS_11643_7 | 151 CNS_11643_7 |
154 } StateEnum; | 152 } StateEnum; |
155 | 153 |
156 /* is the StateEnum charset value for a DBCS charset? */ | 154 /* is the StateEnum charset value for a DBCS charset? */ |
157 #if UCONFIG_NO_NON_HTML5_CONVERSION | |
158 #define IS_JP_DBCS(cs) (JISX208==(cs)) | |
159 #else | |
160 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) | 155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
161 #endif | |
162 | 156 |
163 #define CSM(cs) ((uint16_t)1<<(cs)) | 157 #define CSM(cs) ((uint16_t)1<<(cs)) |
164 | 158 |
165 /* | 159 /* |
166 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence | 160 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence |
167 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x | 161 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x |
168 * | 162 * |
169 * Note: The converter uses some leniency: | 163 * Note: The converter uses some leniency: |
170 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in | 164 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in |
171 * all versions, not just JIS7 and JIS8. | 165 * all versions, not just JIS7 and JIS8. |
172 * - ICU does not distinguish between different versions of JIS X 0208. | 166 * - ICU does not distinguish between different versions of JIS X 0208. |
173 */ | 167 */ |
174 #if UCONFIG_NO_NON_HTML5_CONVERSION | |
175 enum { MAX_JA_VERSION=0 }; | |
176 #else | |
177 enum { MAX_JA_VERSION=4 }; | 168 enum { MAX_JA_VERSION=4 }; |
178 #endif | |
179 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
180 /* | |
181 * TODO(jshin): The encoding spec has JISX212, but we don't support it. | |
182 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 | |
183 */ | |
184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 170 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
185 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 171 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 172 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
188 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 173 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
189 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 174 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
190 #endif | |
191 }; | 175 }; |
192 | 176 |
193 typedef enum { | 177 typedef enum { |
194 ASCII1=0, | 178 ASCII1=0, |
195 LATIN1, | 179 LATIN1, |
196 SBCS, | 180 SBCS, |
197 DBCS, | 181 DBCS, |
198 MBCS, | 182 MBCS, |
199 HWKANA | 183 HWKANA |
200 }Cnv2022Type; | 184 }Cnv2022Type; |
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
367 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_
2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 351 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_
2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
368 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 352 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
369 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 | 353 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 |
370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 354 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 355 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 356 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
373 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 357 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
374 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 | 358 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 |
375 }; | 359 }; |
376 | 360 |
| 361 |
377 /* Type def for refactoring changeState_2022 code*/ | 362 /* Type def for refactoring changeState_2022 code*/ |
378 typedef enum{ | 363 typedef enum{ |
379 #ifdef U_ENABLE_GENERIC_ISO_2022 | 364 #ifdef U_ENABLE_GENERIC_ISO_2022 |
380 ISO_2022=0, | 365 ISO_2022=0, |
381 #endif | 366 #endif |
382 #if UCONFIG_NO_NON_HTML5_CONVERSION | |
383 ISO_2022_JP=1 | |
384 #else | |
385 ISO_2022_JP=1, | 367 ISO_2022_JP=1, |
386 ISO_2022_KR=2, | 368 ISO_2022_KR=2, |
387 ISO_2022_CN=3 | 369 ISO_2022_CN=3 |
388 #endif | |
389 } Variant2022; | 370 } Variant2022; |
390 | 371 |
391 /*********** ISO 2022 Converter Protos ***********/ | 372 /*********** ISO 2022 Converter Protos ***********/ |
392 static void | 373 static void |
393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); | 374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); |
394 | 375 |
395 static void | 376 static void |
396 _ISO2022Close(UConverter *converter); | 377 _ISO2022Close(UConverter *converter); |
397 | 378 |
398 static void | 379 static void |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
495 myConverterData->version = version; | 476 myConverterData->version = version; |
496 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && | 477 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && |
497 (myLocale[2]=='_' || myLocale[2]=='\0')) | 478 (myLocale[2]=='_' || myLocale[2]=='\0')) |
498 { | 479 { |
499 size_t len=0; | 480 size_t len=0; |
500 /* open the required converters and cache them */ | 481 /* open the required converters and cache them */ |
501 if(version>MAX_JA_VERSION) { | 482 if(version>MAX_JA_VERSION) { |
502 /* prevent indexing beyond jpCharsetMasks[] */ | 483 /* prevent indexing beyond jpCharsetMasks[] */ |
503 myConverterData->version = version = 0; | 484 myConverterData->version = version = 0; |
504 } | 485 } |
505 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
506 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | 486 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
507 myConverterData->myConverterArray[ISO8859_7] = | 487 myConverterData->myConverterArray[ISO8859_7] = |
508 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); | 488 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); |
509 } | 489 } |
510 #endif | |
511 myConverterData->myConverterArray[JISX208] = | 490 myConverterData->myConverterArray[JISX208] = |
512 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); | 491 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); |
513 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
514 if(jpCharsetMasks[version]&CSM(JISX212)) { | 492 if(jpCharsetMasks[version]&CSM(JISX212)) { |
515 myConverterData->myConverterArray[JISX212] = | 493 myConverterData->myConverterArray[JISX212] = |
516 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); | 494 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); |
517 } | 495 } |
518 if(jpCharsetMasks[version]&CSM(GB2312)) { | 496 if(jpCharsetMasks[version]&CSM(GB2312)) { |
519 myConverterData->myConverterArray[GB2312] = | 497 myConverterData->myConverterArray[GB2312] = |
520 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ | 498 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ |
521 } | 499 } |
522 if(jpCharsetMasks[version]&CSM(KSC5601)) { | 500 if(jpCharsetMasks[version]&CSM(KSC5601)) { |
523 myConverterData->myConverterArray[KSC5601] = | 501 myConverterData->myConverterArray[KSC5601] = |
524 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); | 502 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); |
525 } | 503 } |
526 #endif | |
527 | 504 |
528 /* set the function pointers to appropriate funtions */ | 505 /* set the function pointers to appropriate funtions */ |
529 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 506 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
530 uprv_strcpy(myConverterData->locale,"ja"); | 507 uprv_strcpy(myConverterData->locale,"ja"); |
531 | 508 |
532 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); | 509 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); |
533 len = uprv_strlen(myConverterData->name); | 510 len = uprv_strlen(myConverterData->name); |
534 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; | 511 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; |
535 myConverterData->name[len+1]='\0'; | 512 myConverterData->name[len+1]='\0'; |
536 } | 513 } |
537 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
538 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 514 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
539 (myLocale[2]=='_' || myLocale[2]=='\0')) | 515 (myLocale[2]=='_' || myLocale[2]=='\0')) |
540 { | 516 { |
541 const char *cnvName; | 517 const char *cnvName; |
542 if(version==1) { | 518 if(version==1) { |
543 cnvName="icu-internal-25546"; | 519 cnvName="icu-internal-25546"; |
544 } else { | 520 } else { |
545 cnvName="ibm-949"; | 521 cnvName="ibm-949"; |
546 myConverterData->version=version=0; | 522 myConverterData->version=version=0; |
547 } | 523 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
597 myConverterData->version = 0; | 573 myConverterData->version = 0; |
598 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); | 574 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); |
599 }else if (version==1){ | 575 }else if (version==1){ |
600 myConverterData->version = 1; | 576 myConverterData->version = 1; |
601 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); | 577 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); |
602 }else { | 578 }else { |
603 myConverterData->version = 2; | 579 myConverterData->version = 2; |
604 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); | 580 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); |
605 } | 581 } |
606 } | 582 } |
607 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION | |
608 else{ | 583 else{ |
609 #ifdef U_ENABLE_GENERIC_ISO_2022 | 584 #ifdef U_ENABLE_GENERIC_ISO_2022 |
610 myConverterData->isFirstBuffer = TRUE; | 585 myConverterData->isFirstBuffer = TRUE; |
611 | 586 |
612 /* append the UTF-8 escape sequence */ | 587 /* append the UTF-8 escape sequence */ |
613 cnv->charErrorBufferLength = 3; | 588 cnv->charErrorBufferLength = 3; |
614 cnv->charErrorBuffer[0] = 0x1b; | 589 cnv->charErrorBuffer[0] = 0x1b; |
615 cnv->charErrorBuffer[1] = 0x25; | 590 cnv->charErrorBuffer[1] = 0x25; |
616 cnv->charErrorBuffer[2] = 0x42; | 591 cnv->charErrorBuffer[2] = 0x42; |
617 | 592 |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
732 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 707 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
733 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE | 708 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE |
734 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 709 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
735 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE | 710 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE |
736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 711 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 712 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
738 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 713 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
739 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 714 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
740 }; | 715 }; |
741 | 716 |
742 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
743 /*************** to unicode *******************/ | 717 /*************** to unicode *******************/ |
744 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | 718 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
745 /* 0 1 2 3 4
5 6 7 8 9 */ | 719 /* 0 1 2 3 4
5 6 7 8 9 */ |
746 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 720 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 721 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 722 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
749 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 723 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 | 724 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 |
751 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 725 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
752 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 726 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
753 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 727 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
754 }; | 728 }; |
755 #endif | |
756 | 729 |
757 | 730 |
758 static UCNV_TableStates_2022 | 731 static UCNV_TableStates_2022 |
759 getKey_2022(char c,int32_t* key,int32_t* offset){ | 732 getKey_2022(char c,int32_t* key,int32_t* offset){ |
760 int32_t togo; | 733 int32_t togo; |
761 int32_t low = 0; | 734 int32_t low = 0; |
762 int32_t hi = MAX_STATES_2022; | 735 int32_t hi = MAX_STATES_2022; |
763 int32_t oldmid=0; | 736 int32_t oldmid=0; |
764 | 737 |
765 togo = normalize_esq_chars_2022[(uint8_t)c]; | 738 togo = normalize_esq_chars_2022[(uint8_t)c]; |
766 if(togo == 0) { | 739 if(togo == 0) { |
767 /* not a valid character anywhere in an escape sequence */ | 740 /* not a valid character anywhere in an escape sequence */ |
768 *key = 0; | 741 *key = 0; |
769 *offset = 0; | 742 *offset = 0; |
770 return INVALID_2022; | 743 return INVALID_2022; |
771 } | 744 } |
772 togo = (*key << 5) + togo; | 745 togo = (*key << 5) + togo; |
773 | 746 |
774 while (hi != low) /*binary search*/{ | 747 while (hi != low) /*binary search*/{ |
775 | 748 |
776 register int32_t mid = (hi+low) >> 1; /*Finds median*/ | 749 int32_t mid = (hi+low) >> 1; /*Finds median*/ |
777 | 750 |
778 if (mid == oldmid) | 751 if (mid == oldmid) |
779 break; | 752 break; |
780 | 753 |
781 if (escSeqStateTable_Key_2022[mid] > togo){ | 754 if (escSeqStateTable_Key_2022[mid] > togo){ |
782 hi = mid; | 755 hi = mid; |
783 } | 756 } |
784 else if (escSeqStateTable_Key_2022[mid] < togo){ | 757 else if (escSeqStateTable_Key_2022[mid] < togo){ |
785 low = mid; | 758 low = mid; |
786 } | 759 } |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
898 if(myData2022->toU2022State.g<2) { | 871 if(myData2022->toU2022State.g<2) { |
899 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; | 872 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; |
900 } | 873 } |
901 myData2022->toU2022State.g=2; | 874 myData2022->toU2022State.g=2; |
902 } else { | 875 } else { |
903 /* illegal to have SS2 before a matching designator */ | 876 /* illegal to have SS2 before a matching designator */ |
904 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 877 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
905 } | 878 } |
906 break; | 879 break; |
907 /* case SS3_STATE: not used in ISO-2022-JP-x */ | 880 /* case SS3_STATE: not used in ISO-2022-JP-x */ |
908 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
909 case ISO8859_1: | 881 case ISO8859_1: |
910 case ISO8859_7: | 882 case ISO8859_7: |
911 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 883 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
912 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 884 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
913 } else { | 885 } else { |
914 /* G2 charset for SS2 */ | 886 /* G2 charset for SS2 */ |
915 myData2022->toU2022State.cs[2]=(int8_t)tempState; | 887 myData2022->toU2022State.cs[2]=(int8_t)tempState; |
916 } | 888 } |
917 break; | 889 break; |
918 #endif | |
919 default: | 890 default: |
920 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 891 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
921 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 892 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
922 } else { | 893 } else { |
923 /* G0 charset */ | 894 /* G0 charset */ |
924 myData2022->toU2022State.cs[0]=(int8_t)tempState; | 895 myData2022->toU2022State.cs[0]=(int8_t)tempState; |
925 } | 896 } |
926 break; | 897 break; |
927 } | 898 } |
928 } | 899 } |
929 break; | 900 break; |
930 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
931 case ISO_2022_CN: | 901 case ISO_2022_CN: |
932 { | 902 { |
933 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; | 903 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
934 switch(tempState) { | 904 switch(tempState) { |
935 case INVALID_STATE: | 905 case INVALID_STATE: |
936 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 906 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
937 break; | 907 break; |
938 case SS2_STATE: | 908 case SS2_STATE: |
939 if(myData2022->toU2022State.cs[2]!=0) { | 909 if(myData2022->toU2022State.cs[2]!=0) { |
940 if(myData2022->toU2022State.g<2) { | 910 if(myData2022->toU2022State.g<2) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
982 } | 952 } |
983 } | 953 } |
984 break; | 954 break; |
985 case ISO_2022_KR: | 955 case ISO_2022_KR: |
986 if(offset==0x30){ | 956 if(offset==0x30){ |
987 /* nothing to be done, just accept this one escape sequence */ | 957 /* nothing to be done, just accept this one escape sequence */ |
988 } else { | 958 } else { |
989 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 959 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
990 } | 960 } |
991 break; | 961 break; |
992 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | |
993 | 962 |
994 default: | 963 default: |
995 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 964 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
996 break; | 965 break; |
997 } | 966 } |
998 } | 967 } |
999 if(U_SUCCESS(*err)) { | 968 if(U_SUCCESS(*err)) { |
1000 _this->toULength = 0; | 969 _this->toULength = 0; |
1001 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { | 970 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { |
1002 if(_this->toULength>1) { | 971 if(_this->toULength>1) { |
(...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1403 * KSC5601 : alias to ibm-949 mapping table | 1372 * KSC5601 : alias to ibm-949 mapping table |
1404 * GB2312 : alias to ibm-1386 mapping table | 1373 * GB2312 : alias to ibm-1386 mapping table |
1405 * ISO-8859-1 : Algorithmic implemented as LATIN1 case | 1374 * ISO-8859-1 : Algorithmic implemented as LATIN1 case |
1406 * ISO-8859-7 : alisas to ibm-9409 mapping table | 1375 * ISO-8859-7 : alisas to ibm-9409 mapping table |
1407 */ | 1376 */ |
1408 | 1377 |
1409 /* preference order of JP charsets */ | 1378 /* preference order of JP charsets */ |
1410 static const StateEnum jpCharsetPref[]={ | 1379 static const StateEnum jpCharsetPref[]={ |
1411 ASCII, | 1380 ASCII, |
1412 JISX201, | 1381 JISX201, |
1413 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1414 ISO8859_1, | 1382 ISO8859_1, |
1415 ISO8859_7, | 1383 ISO8859_7, |
1416 #endif | |
1417 JISX208, | 1384 JISX208, |
1418 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1419 JISX212, | 1385 JISX212, |
1420 GB2312, | 1386 GB2312, |
1421 KSC5601, | 1387 KSC5601, |
1422 #endif | |
1423 HWKANA_7BIT | 1388 HWKANA_7BIT |
1424 }; | 1389 }; |
1425 | 1390 |
1426 /* | 1391 /* |
1427 * The escape sequences must be in order of the enum constants like JISX201 = 3
, | 1392 * The escape sequences must be in order of the enum constants like JISX201 = 3
, |
1428 * not in order of jpCharsetPref[]! | 1393 * not in order of jpCharsetPref[]! |
1429 */ | 1394 */ |
1430 static const char escSeqChars[][6] ={ | 1395 static const char escSeqChars[][6] ={ |
1431 "\x1B\x28\x42", /* <ESC>(B ASCII */ | 1396 "\x1B\x28\x42", /* <ESC>(B ASCII */ |
1432 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ | 1397 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ |
(...skipping 312 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1745 choices[choiceCount++] = cs = pFromU2022State->cs[0]; | 1710 choices[choiceCount++] = cs = pFromU2022State->cs[0]; |
1746 csm &= ~CSM(cs); | 1711 csm &= ~CSM(cs); |
1747 | 1712 |
1748 /* try the current G2 charset */ | 1713 /* try the current G2 charset */ |
1749 if((cs = pFromU2022State->cs[2]) != 0) { | 1714 if((cs = pFromU2022State->cs[2]) != 0) { |
1750 choices[choiceCount++] = cs; | 1715 choices[choiceCount++] = cs; |
1751 csm &= ~CSM(cs); | 1716 csm &= ~CSM(cs); |
1752 } | 1717 } |
1753 | 1718 |
1754 /* try all the other possible charsets */ | 1719 /* try all the other possible charsets */ |
1755 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) { | 1720 for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) { |
1756 cs = (int8_t)jpCharsetPref[i]; | 1721 cs = (int8_t)jpCharsetPref[i]; |
1757 if(CSM(cs) & csm) { | 1722 if(CSM(cs) & csm) { |
1758 choices[choiceCount++] = cs; | 1723 choices[choiceCount++] = cs; |
1759 csm &= ~CSM(cs); | 1724 csm &= ~CSM(cs); |
1760 } | 1725 } |
1761 } | 1726 } |
1762 } | 1727 } |
1763 | 1728 |
1764 cs = g = 0; | 1729 cs = g = 0; |
1765 /* | 1730 /* |
(...skipping 16 matching lines...) Expand all Loading... |
1782 int8_t cs0 = choices[i]; | 1747 int8_t cs0 = choices[i]; |
1783 switch(cs0) { | 1748 switch(cs0) { |
1784 case ASCII: | 1749 case ASCII: |
1785 if(sourceChar <= 0x7f) { | 1750 if(sourceChar <= 0x7f) { |
1786 targetValue = (uint32_t)sourceChar; | 1751 targetValue = (uint32_t)sourceChar; |
1787 len = 1; | 1752 len = 1; |
1788 cs = cs0; | 1753 cs = cs0; |
1789 g = 0; | 1754 g = 0; |
1790 } | 1755 } |
1791 break; | 1756 break; |
1792 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1793 case ISO8859_1: | 1757 case ISO8859_1: |
1794 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { | 1758 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
1795 targetValue = (uint32_t)sourceChar - 0x80; | 1759 targetValue = (uint32_t)sourceChar - 0x80; |
1796 len = 1; | 1760 len = 1; |
1797 cs = cs0; | 1761 cs = cs0; |
1798 g = 2; | 1762 g = 2; |
1799 } | 1763 } |
1800 break; | 1764 break; |
1801 #endif | |
1802 case HWKANA_7BIT: | 1765 case HWKANA_7BIT: |
1803 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { | 1766 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { |
1804 if(converterData->version==3) { | 1767 if(converterData->version==3) { |
1805 /* JIS7: use G1 (SO) */ | 1768 /* JIS7: use G1 (SO) */ |
1806 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ | 1769 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ |
1807 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); | 1770 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); |
1808 len = 1; | 1771 len = 1; |
1809 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ | 1772 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ |
1810 g = 1; | 1773 g = 1; |
1811 } else if(converterData->version==4) { | 1774 } else if(converterData->version==4) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1853 } | 1816 } |
1854 } else if(len == 0 && useFallback && | 1817 } else if(len == 0 && useFallback && |
1855 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { | 1818 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { |
1856 targetValue = hwkana_fb[sourceChar - HWKANA_START]; | 1819 targetValue = hwkana_fb[sourceChar - HWKANA_START]; |
1857 len = -2; | 1820 len = -2; |
1858 cs = cs0; | 1821 cs = cs0; |
1859 g = 0; | 1822 g = 0; |
1860 useFallback = FALSE; | 1823 useFallback = FALSE; |
1861 } | 1824 } |
1862 break; | 1825 break; |
1863 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1864 case ISO8859_7: | 1826 case ISO8859_7: |
1865 /* G0 SBCS forced to 7-bit output */ | 1827 /* G0 SBCS forced to 7-bit output */ |
1866 len2 = MBCS_SINGLE_FROM_UCHAR32( | 1828 len2 = MBCS_SINGLE_FROM_UCHAR32( |
1867 converterData->myConverterArray[cs0], | 1829 converterData->myConverterArray[cs0], |
1868 sourceChar, &value, | 1830 sourceChar, &value, |
1869 useFallback); | 1831 useFallback); |
1870 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { | 1832 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { |
1871 targetValue = value - 0x80; | 1833 targetValue = value - 0x80; |
1872 len = len2; | 1834 len = len2; |
1873 cs = cs0; | 1835 cs = cs0; |
1874 g = 2; | 1836 g = 2; |
1875 useFallback = FALSE; | 1837 useFallback = FALSE; |
1876 } | 1838 } |
1877 break; | 1839 break; |
1878 #endif | |
1879 default: | 1840 default: |
1880 /* G0 DBCS */ | 1841 /* G0 DBCS */ |
1881 len2 = MBCS_FROM_UCHAR32_ISO2022( | 1842 len2 = MBCS_FROM_UCHAR32_ISO2022( |
1882 converterData->myConverterArray[cs0], | 1843 converterData->myConverterArray[cs0], |
1883 sourceChar, &value, | 1844 sourceChar, &value, |
1884 useFallback, MBCS_OUTPUT_2); | 1845 useFallback, MBCS_OUTPUT_2); |
1885 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ | 1846 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ |
1886 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
1887 if(cs0 == KSC5601) { | 1847 if(cs0 == KSC5601) { |
1888 /* | 1848 /* |
1889 * Check for valid bytes for the encoding scheme. | 1849 * Check for valid bytes for the encoding scheme. |
1890 * This is necessary because the sub-converter (wind
ows-949) | 1850 * This is necessary because the sub-converter (wind
ows-949) |
1891 * has a broader encoding scheme than is valid for 2
022. | 1851 * has a broader encoding scheme than is valid for 2
022. |
1892 */ | 1852 */ |
1893 value = _2022FromGR94DBCS(value); | 1853 value = _2022FromGR94DBCS(value); |
1894 if(value == 0) { | 1854 if(value == 0) { |
1895 break; | 1855 break; |
1896 } | 1856 } |
1897 } | 1857 } |
1898 #endif | |
1899 targetValue = value; | 1858 targetValue = value; |
1900 len = len2; | 1859 len = len2; |
1901 cs = cs0; | 1860 cs = cs0; |
1902 g = 0; | 1861 g = 0; |
1903 useFallback = FALSE; | 1862 useFallback = FALSE; |
1904 } | 1863 } |
1905 break; | 1864 break; |
1906 } | 1865 } |
1907 } | 1866 } |
1908 | 1867 |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2182 /* return from a single-shift state to the previous one */ | 2141 /* return from a single-shift state to the previous one */ |
2183 if(pToU2022State->g >= 2) { | 2142 if(pToU2022State->g >= 2) { |
2184 pToU2022State->g=pToU2022State->prevG; | 2143 pToU2022State->g=pToU2022State->prevG; |
2185 } | 2144 } |
2186 } else switch(cs) { | 2145 } else switch(cs) { |
2187 case ASCII: | 2146 case ASCII: |
2188 if(mySourceChar <= 0x7f) { | 2147 if(mySourceChar <= 0x7f) { |
2189 targetUniChar = mySourceChar; | 2148 targetUniChar = mySourceChar; |
2190 } | 2149 } |
2191 break; | 2150 break; |
2192 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
2193 case ISO8859_1: | 2151 case ISO8859_1: |
2194 if(mySourceChar <= 0x7f) { | 2152 if(mySourceChar <= 0x7f) { |
2195 targetUniChar = mySourceChar + 0x80; | 2153 targetUniChar = mySourceChar + 0x80; |
2196 } | 2154 } |
2197 /* return from a single-shift state to the previous one */ | 2155 /* return from a single-shift state to the previous one */ |
2198 pToU2022State->g=pToU2022State->prevG; | 2156 pToU2022State->g=pToU2022State->prevG; |
2199 break; | 2157 break; |
2200 case ISO8859_7: | 2158 case ISO8859_7: |
2201 if(mySourceChar <= 0x7f) { | 2159 if(mySourceChar <= 0x7f) { |
2202 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ | 2160 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ |
2203 targetUniChar = | 2161 targetUniChar = |
2204 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( | 2162 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( |
2205 myData->myConverterArray[cs], | 2163 myData->myConverterArray[cs], |
2206 mySourceChar + 0x80); | 2164 mySourceChar + 0x80); |
2207 } | 2165 } |
2208 /* return from a single-shift state to the previous one */ | 2166 /* return from a single-shift state to the previous one */ |
2209 pToU2022State->g=pToU2022State->prevG; | 2167 pToU2022State->g=pToU2022State->prevG; |
2210 break; | 2168 break; |
2211 #endif | |
2212 case JISX201: | 2169 case JISX201: |
2213 if(mySourceChar <= 0x7f) { | 2170 if(mySourceChar <= 0x7f) { |
2214 targetUniChar = jisx201ToU(mySourceChar); | 2171 targetUniChar = jisx201ToU(mySourceChar); |
2215 } | 2172 } |
2216 break; | 2173 break; |
2217 case HWKANA_7BIT: | 2174 case HWKANA_7BIT: |
2218 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { | 2175 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { |
2219 /* 7-bit halfwidth Katakana */ | 2176 /* 7-bit halfwidth Katakana */ |
2220 targetUniChar = mySourceChar + (HWKANA_START - 0x21); | 2177 targetUniChar = mySourceChar + (HWKANA_START - 0x21); |
2221 } | 2178 } |
(...skipping 19 matching lines...) Expand all Loading... |
2241 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; | 2198 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; |
2242 if (leadIsOk && trailIsOk) { | 2199 if (leadIsOk && trailIsOk) { |
2243 ++mySource; | 2200 ++mySource; |
2244 tmpSourceChar = (mySourceChar << 8) | trailByte; | 2201 tmpSourceChar = (mySourceChar << 8) | trailByte; |
2245 if(cs == JISX208) { | 2202 if(cs == JISX208) { |
2246 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); | 2203 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); |
2247 mySourceChar = tmpSourceChar; | 2204 mySourceChar = tmpSourceChar; |
2248 } else { | 2205 } else { |
2249 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ | 2206 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ |
2250 mySourceChar = tmpSourceChar; | 2207 mySourceChar = tmpSourceChar; |
2251 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
2252 if (cs == KSC5601) { | 2208 if (cs == KSC5601) { |
2253 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ | 2209 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ |
2254 } | 2210 } |
2255 #endif | |
2256 tempBuf[0] = (char)(tmpSourceChar >> 8); | 2211 tempBuf[0] = (char)(tmpSourceChar >> 8); |
2257 tempBuf[1] = (char)(tmpSourceChar); | 2212 tempBuf[1] = (char)(tmpSourceChar); |
2258 } | 2213 } |
2259 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); | 2214 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); |
2260 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { | 2215 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { |
2261 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ | 2216 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ |
2262 ++mySource; | 2217 ++mySource; |
2263 /* add another bit so that the code below writes 2 b
ytes in case of error */ | 2218 /* add another bit so that the code below writes 2 b
ytes in case of error */ |
2264 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; | 2219 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; |
2265 } | 2220 } |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2307 *err =U_BUFFER_OVERFLOW_ERROR; | 2262 *err =U_BUFFER_OVERFLOW_ERROR; |
2308 break; | 2263 break; |
2309 } | 2264 } |
2310 } | 2265 } |
2311 endloop: | 2266 endloop: |
2312 args->target = myTarget; | 2267 args->target = myTarget; |
2313 args->source = mySource; | 2268 args->source = mySource; |
2314 } | 2269 } |
2315 | 2270 |
2316 | 2271 |
2317 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
2318 /*************************************************************** | 2272 /*************************************************************** |
2319 * Rules for ISO-2022-KR encoding | 2273 * Rules for ISO-2022-KR encoding |
2320 * i) The KSC5601 designator sequence should appear only once in a file, | 2274 * i) The KSC5601 designator sequence should appear only once in a file, |
2321 * at the begining of a line before any KSC5601 characters. This usually | 2275 * at the begining of a line before any KSC5601 characters. This usually |
2322 * means that it appears by itself on the first line of the file | 2276 * means that it appears by itself on the first line of the file |
2323 * ii) There are only 2 shifting sequences SO to shift into double byte mode | 2277 * ii) There are only 2 shifting sequences SO to shift into double byte mode |
2324 * and SI to shift into single byte mode | 2278 * and SI to shift into single byte mode |
2325 */ | 2279 */ |
2326 static void | 2280 static void |
2327 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ | 2281 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ |
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3451 } | 3405 } |
3452 else{ | 3406 else{ |
3453 *err =U_BUFFER_OVERFLOW_ERROR; | 3407 *err =U_BUFFER_OVERFLOW_ERROR; |
3454 break; | 3408 break; |
3455 } | 3409 } |
3456 } | 3410 } |
3457 endloop: | 3411 endloop: |
3458 args->target = myTarget; | 3412 args->target = myTarget; |
3459 args->source = mySource; | 3413 args->source = mySource; |
3460 } | 3414 } |
3461 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | |
3462 | 3415 |
3463 static void | 3416 static void |
3464 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { | 3417 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { |
3465 UConverter *cnv = args->converter; | 3418 UConverter *cnv = args->converter; |
3466 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; | 3419 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; |
3467 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; | 3420 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; |
3468 char *p, *subchar; | 3421 char *p, *subchar; |
3469 char buffer[8]; | 3422 char buffer[8]; |
3470 int32_t length; | 3423 int32_t length; |
3471 | 3424 |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3653 #endif | 3606 #endif |
3654 | 3607 |
3655 cnvData = (UConverterDataISO2022*)cnv->extraInfo; | 3608 cnvData = (UConverterDataISO2022*)cnv->extraInfo; |
3656 | 3609 |
3657 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ | 3610 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ |
3658 switch(cnvData->locale[0]){ | 3611 switch(cnvData->locale[0]){ |
3659 case 'j': | 3612 case 'j': |
3660 /* include JIS X 0201 which is hardcoded */ | 3613 /* include JIS X 0201 which is hardcoded */ |
3661 sa->add(sa->set, 0xa5); | 3614 sa->add(sa->set, 0xa5); |
3662 sa->add(sa->set, 0x203e); | 3615 sa->add(sa->set, 0x203e); |
3663 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
3664 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | 3616 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
3665 /* include Latin-1 for some variants of JP */ | 3617 /* include Latin-1 for some variants of JP */ |
3666 sa->addRange(sa->set, 0, 0xff); | 3618 sa->addRange(sa->set, 0, 0xff); |
3667 } else { | 3619 } else { |
3668 /* include ASCII for JP */ | 3620 /* include ASCII for JP */ |
3669 sa->addRange(sa->set, 0, 0x7f); | 3621 sa->addRange(sa->set, 0, 0x7f); |
3670 } | 3622 } |
3671 #else | |
3672 /* include ASCII for JP */ | |
3673 sa->addRange(sa->set, 0, 0x7f); | |
3674 #endif | |
3675 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { | 3623 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { |
3676 /* | 3624 /* |
3677 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 | 3625 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 |
3678 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) | 3626 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) |
3679 * use half-width Katakana. | 3627 * use half-width Katakana. |
3680 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) | 3628 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) |
3681 * half-width Katakana via the ESC ( I sequence. | 3629 * half-width Katakana via the ESC ( I sequence. |
3682 * However, we only emit (fromUnicode) half-width Katakana according
to the | 3630 * However, we only emit (fromUnicode) half-width Katakana according
to the |
3683 * definition of each variant. | 3631 * definition of each variant. |
3684 * | 3632 * |
3685 * When including fallbacks, | 3633 * When including fallbacks, |
3686 * we need to include half-width Katakana Unicode code points for al
l JP variants because | 3634 * we need to include half-width Katakana Unicode code points for al
l JP variants because |
3687 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). | 3635 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). |
3688 */ | 3636 */ |
3689 /* include half-width Katakana for JP */ | 3637 /* include half-width Katakana for JP */ |
3690 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | 3638 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
3691 } | 3639 } |
3692 break; | 3640 break; |
3693 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
3694 case 'c': | 3641 case 'c': |
3695 case 'z': | 3642 case 'z': |
3696 /* include ASCII for CN */ | 3643 /* include ASCII for CN */ |
3697 sa->addRange(sa->set, 0, 0x7f); | 3644 sa->addRange(sa->set, 0, 0x7f); |
3698 break; | 3645 break; |
3699 case 'k': | 3646 case 'k': |
3700 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ | 3647 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ |
3701 cnvData->currentConverter->sharedData->impl->getUnicodeSet( | 3648 cnvData->currentConverter->sharedData->impl->getUnicodeSet( |
3702 cnvData->currentConverter, sa, which, pErrorCode); | 3649 cnvData->currentConverter, sa, which, pErrorCode); |
3703 /* the loop over myConverterArray[] will simply not find another convert
er */ | 3650 /* the loop over myConverterArray[] will simply not find another convert
er */ |
3704 break; | 3651 break; |
3705 #endif | |
3706 default: | 3652 default: |
3707 break; | 3653 break; |
3708 } | 3654 } |
3709 | 3655 |
3710 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem
ent ucnv_getUnicodeSet() with reverse fallbacks. */ | 3656 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem
ent ucnv_getUnicodeSet() with reverse fallbacks. */ |
3711 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 3657 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
3712 cnvData->version==0 && i==CNS_11643 | 3658 cnvData->version==0 && i==CNS_11643 |
3713 ) { | 3659 ) { |
3714 /* special handling for non-EXT ISO-2022-CN: add only code point
s for CNS planes 1 and 2 */ | 3660 /* special handling for non-EXT ISO-2022-CN: add only code point
s for CNS planes 1 and 2 */ |
3715 ucnv_MBCSGetUnicodeSetForBytes( | 3661 ucnv_MBCSGetUnicodeSetForBytes( |
3716 cnvData->myConverterArray[i], | 3662 cnvData->myConverterArray[i], |
3717 sa, UCNV_ROUNDTRIP_SET, | 3663 sa, UCNV_ROUNDTRIP_SET, |
3718 0, 0x81, 0x82, | 3664 0, 0x81, 0x82, |
3719 pErrorCode); | 3665 pErrorCode); |
3720 } | 3666 } |
3721 #endif | 3667 #endif |
3722 | 3668 |
3723 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { | 3669 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
3724 UConverterSetFilter filter; | 3670 UConverterSetFilter filter; |
3725 if(cnvData->myConverterArray[i]!=NULL) { | 3671 if(cnvData->myConverterArray[i]!=NULL) { |
3726 if(cnvData->locale[0]=='j' && i==JISX208) { | 3672 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
3727 /* | 3673 cnvData->version==0 && i==CNS_11643 |
3728 * Only add code points that map to Shift-JIS codes | 3674 ) { |
3729 * corresponding to JIS X 0208. | |
3730 */ | |
3731 filter=UCNV_SET_FILTER_SJIS; | |
3732 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
3733 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | |
3734 cnvData->version==0 && i==CNS_11643) { | |
3735 /* | 3675 /* |
3736 * Version-specific for CN: | 3676 * Version-specific for CN: |
3737 * CN version 0 does not map CNS planes 3..7 although | 3677 * CN version 0 does not map CNS planes 3..7 although |
3738 * they are all available in the CNS conversion table; | 3678 * they are all available in the CNS conversion table; |
3739 * CN version 1 (-EXT) does map them all. | 3679 * CN version 1 (-EXT) does map them all. |
3740 * The two versions create different Unicode sets. | 3680 * The two versions create different Unicode sets. |
3741 */ | 3681 */ |
3742 filter=UCNV_SET_FILTER_2022_CN; | 3682 filter=UCNV_SET_FILTER_2022_CN; |
| 3683 } else if(cnvData->locale[0]=='j' && i==JISX208) { |
| 3684 /* |
| 3685 * Only add code points that map to Shift-JIS codes |
| 3686 * corresponding to JIS X 0208. |
| 3687 */ |
| 3688 filter=UCNV_SET_FILTER_SJIS; |
3743 } else if(i==KSC5601) { | 3689 } else if(i==KSC5601) { |
3744 /* | 3690 /* |
3745 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on
multiple tables) | 3691 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on
multiple tables) |
3746 * are broader than GR94. | 3692 * are broader than GR94. |
3747 */ | 3693 */ |
3748 filter=UCNV_SET_FILTER_GR94DBCS; | 3694 filter=UCNV_SET_FILTER_GR94DBCS; |
3749 #endif | |
3750 } else { | 3695 } else { |
3751 filter=UCNV_SET_FILTER_NONE; | 3696 filter=UCNV_SET_FILTER_NONE; |
3752 } | 3697 } |
3753 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); | 3698 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); |
3754 } | 3699 } |
3755 } | 3700 } |
3756 | 3701 |
3757 /* | 3702 /* |
3758 * ISO 2022 converters must not convert SO/SI/ESC despite what | 3703 * ISO 2022 converters must not convert SO/SI/ESC despite what |
3759 * sub-converters do by themselves. | 3704 * sub-converters do by themselves. |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3877 NULL, | 3822 NULL, |
3878 NULL, | 3823 NULL, |
3879 &_ISO2022JPStaticData, | 3824 &_ISO2022JPStaticData, |
3880 FALSE, | 3825 FALSE, |
3881 &_ISO2022JPImpl, | 3826 &_ISO2022JPImpl, |
3882 0, UCNV_MBCS_TABLE_INITIALIZER | 3827 0, UCNV_MBCS_TABLE_INITIALIZER |
3883 }; | 3828 }; |
3884 | 3829 |
3885 } // namespace | 3830 } // namespace |
3886 | 3831 |
3887 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
3888 /************* KR ***************/ | 3832 /************* KR ***************/ |
3889 static const UConverterImpl _ISO2022KRImpl={ | 3833 static const UConverterImpl _ISO2022KRImpl={ |
3890 UCNV_ISO_2022, | 3834 UCNV_ISO_2022, |
3891 | 3835 |
3892 NULL, | 3836 NULL, |
3893 NULL, | 3837 NULL, |
3894 | 3838 |
3895 _ISO2022Open, | 3839 _ISO2022Open, |
3896 _ISO2022Close, | 3840 _ISO2022Close, |
3897 _ISO2022Reset, | 3841 _ISO2022Reset, |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3994 ~((uint32_t) 0), | 3938 ~((uint32_t) 0), |
3995 NULL, | 3939 NULL, |
3996 NULL, | 3940 NULL, |
3997 &_ISO2022CNStaticData, | 3941 &_ISO2022CNStaticData, |
3998 FALSE, | 3942 FALSE, |
3999 &_ISO2022CNImpl, | 3943 &_ISO2022CNImpl, |
4000 0, UCNV_MBCS_TABLE_INITIALIZER | 3944 0, UCNV_MBCS_TABLE_INITIALIZER |
4001 }; | 3945 }; |
4002 | 3946 |
4003 } // namespace | 3947 } // namespace |
4004 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | |
4005 | 3948 |
4006 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 3949 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
OLD | NEW |