OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2000-2014, International Business Machines | 3 * Copyright (C) 2000-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * file name: ucnv2022.cpp | 6 * file name: ucnv2022.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
145 CNS_11643_1, | 145 CNS_11643_1, |
146 CNS_11643_2, | 146 CNS_11643_2, |
147 CNS_11643_3, | 147 CNS_11643_3, |
148 CNS_11643_4, | 148 CNS_11643_4, |
149 CNS_11643_5, | 149 CNS_11643_5, |
150 CNS_11643_6, | 150 CNS_11643_6, |
151 CNS_11643_7 | 151 CNS_11643_7 |
152 } StateEnum; | 152 } StateEnum; |
153 | 153 |
154 /* is the StateEnum charset value for a DBCS charset? */ | 154 /* is the StateEnum charset value for a DBCS charset? */ |
| 155 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 156 #define IS_JP_DBCS(cs) (JISX208==(cs)) |
| 157 #else |
155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) | 158 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
| 159 #endif |
156 | 160 |
157 #define CSM(cs) ((uint16_t)1<<(cs)) | 161 #define CSM(cs) ((uint16_t)1<<(cs)) |
158 | 162 |
159 /* | 163 /* |
160 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence | 164 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence |
161 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x | 165 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x |
162 * | 166 * |
163 * Note: The converter uses some leniency: | 167 * Note: The converter uses some leniency: |
164 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in | 168 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in |
165 * all versions, not just JIS7 and JIS8. | 169 * all versions, not just JIS7 and JIS8. |
166 * - ICU does not distinguish between different versions of JIS X 0208. | 170 * - ICU does not distinguish between different versions of JIS X 0208. |
167 */ | 171 */ |
| 172 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 173 enum { MAX_JA_VERSION=0 }; |
| 174 #else |
168 enum { MAX_JA_VERSION=4 }; | 175 enum { MAX_JA_VERSION=4 }; |
| 176 #endif |
169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 177 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
| 178 /* |
| 179 * TODO(jshin): The encoding spec has JISX212, but we don't support it. |
| 180 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 |
| 181 */ |
170 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
| 183 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
171 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
172 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
173 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
174 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
| 188 #endif |
175 }; | 189 }; |
176 | 190 |
177 typedef enum { | 191 typedef enum { |
178 ASCII1=0, | 192 ASCII1=0, |
179 LATIN1, | 193 LATIN1, |
180 SBCS, | 194 SBCS, |
181 DBCS, | 195 DBCS, |
182 MBCS, | 196 MBCS, |
183 HWKANA | 197 HWKANA |
184 }Cnv2022Type; | 198 }Cnv2022Type; |
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
351 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_
2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 365 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_
2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
352 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 366 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
353 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 | 367 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 |
354 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 368 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
355 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 369 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
356 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
357 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
358 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 | 372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 |
359 }; | 373 }; |
360 | 374 |
361 | |
362 /* Type def for refactoring changeState_2022 code*/ | 375 /* Type def for refactoring changeState_2022 code*/ |
363 typedef enum{ | 376 typedef enum{ |
364 #ifdef U_ENABLE_GENERIC_ISO_2022 | 377 #ifdef U_ENABLE_GENERIC_ISO_2022 |
365 ISO_2022=0, | 378 ISO_2022=0, |
366 #endif | 379 #endif |
| 380 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 381 ISO_2022_JP=1 |
| 382 #else |
367 ISO_2022_JP=1, | 383 ISO_2022_JP=1, |
368 ISO_2022_KR=2, | 384 ISO_2022_KR=2, |
369 ISO_2022_CN=3 | 385 ISO_2022_CN=3 |
| 386 #endif |
370 } Variant2022; | 387 } Variant2022; |
371 | 388 |
372 /*********** ISO 2022 Converter Protos ***********/ | 389 /*********** ISO 2022 Converter Protos ***********/ |
373 static void | 390 static void |
374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); | 391 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); |
375 | 392 |
376 static void | 393 static void |
377 _ISO2022Close(UConverter *converter); | 394 _ISO2022Close(UConverter *converter); |
378 | 395 |
379 static void | 396 static void |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
476 myConverterData->version = version; | 493 myConverterData->version = version; |
477 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && | 494 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && |
478 (myLocale[2]=='_' || myLocale[2]=='\0')) | 495 (myLocale[2]=='_' || myLocale[2]=='\0')) |
479 { | 496 { |
480 size_t len=0; | 497 size_t len=0; |
481 /* open the required converters and cache them */ | 498 /* open the required converters and cache them */ |
482 if(version>MAX_JA_VERSION) { | 499 if(version>MAX_JA_VERSION) { |
483 /* prevent indexing beyond jpCharsetMasks[] */ | 500 /* prevent indexing beyond jpCharsetMasks[] */ |
484 myConverterData->version = version = 0; | 501 myConverterData->version = version = 0; |
485 } | 502 } |
| 503 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
486 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | 504 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
487 myConverterData->myConverterArray[ISO8859_7] = | 505 myConverterData->myConverterArray[ISO8859_7] = |
488 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); | 506 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); |
489 } | 507 } |
| 508 #endif |
490 myConverterData->myConverterArray[JISX208] = | 509 myConverterData->myConverterArray[JISX208] = |
491 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); | 510 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); |
| 511 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
492 if(jpCharsetMasks[version]&CSM(JISX212)) { | 512 if(jpCharsetMasks[version]&CSM(JISX212)) { |
493 myConverterData->myConverterArray[JISX212] = | 513 myConverterData->myConverterArray[JISX212] = |
494 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); | 514 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); |
495 } | 515 } |
496 if(jpCharsetMasks[version]&CSM(GB2312)) { | 516 if(jpCharsetMasks[version]&CSM(GB2312)) { |
497 myConverterData->myConverterArray[GB2312] = | 517 myConverterData->myConverterArray[GB2312] = |
498 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ | 518 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ |
499 } | 519 } |
500 if(jpCharsetMasks[version]&CSM(KSC5601)) { | 520 if(jpCharsetMasks[version]&CSM(KSC5601)) { |
501 myConverterData->myConverterArray[KSC5601] = | 521 myConverterData->myConverterArray[KSC5601] = |
502 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); | 522 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); |
503 } | 523 } |
| 524 #endif |
504 | 525 |
505 /* set the function pointers to appropriate funtions */ | 526 /* set the function pointers to appropriate funtions */ |
506 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 527 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
507 uprv_strcpy(myConverterData->locale,"ja"); | 528 uprv_strcpy(myConverterData->locale,"ja"); |
508 | 529 |
509 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); | 530 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); |
510 len = uprv_strlen(myConverterData->name); | 531 len = uprv_strlen(myConverterData->name); |
511 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; | 532 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; |
512 myConverterData->name[len+1]='\0'; | 533 myConverterData->name[len+1]='\0'; |
513 } | 534 } |
| 535 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
514 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 536 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
515 (myLocale[2]=='_' || myLocale[2]=='\0')) | 537 (myLocale[2]=='_' || myLocale[2]=='\0')) |
516 { | 538 { |
517 const char *cnvName; | 539 const char *cnvName; |
518 if(version==1) { | 540 if(version==1) { |
519 cnvName="icu-internal-25546"; | 541 cnvName="icu-internal-25546"; |
520 } else { | 542 } else { |
521 cnvName="ibm-949"; | 543 cnvName="ibm-949"; |
522 myConverterData->version=version=0; | 544 myConverterData->version=version=0; |
523 } | 545 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
573 myConverterData->version = 0; | 595 myConverterData->version = 0; |
574 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); | 596 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); |
575 }else if (version==1){ | 597 }else if (version==1){ |
576 myConverterData->version = 1; | 598 myConverterData->version = 1; |
577 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); | 599 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); |
578 }else { | 600 }else { |
579 myConverterData->version = 2; | 601 myConverterData->version = 2; |
580 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); | 602 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); |
581 } | 603 } |
582 } | 604 } |
| 605 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION |
583 else{ | 606 else{ |
584 #ifdef U_ENABLE_GENERIC_ISO_2022 | 607 #ifdef U_ENABLE_GENERIC_ISO_2022 |
585 myConverterData->isFirstBuffer = TRUE; | 608 myConverterData->isFirstBuffer = TRUE; |
586 | 609 |
587 /* append the UTF-8 escape sequence */ | 610 /* append the UTF-8 escape sequence */ |
588 cnv->charErrorBufferLength = 3; | 611 cnv->charErrorBufferLength = 3; |
589 cnv->charErrorBuffer[0] = 0x1b; | 612 cnv->charErrorBuffer[0] = 0x1b; |
590 cnv->charErrorBuffer[1] = 0x25; | 613 cnv->charErrorBuffer[1] = 0x25; |
591 cnv->charErrorBuffer[2] = 0x42; | 614 cnv->charErrorBuffer[2] = 0x42; |
592 | 615 |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
707 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 730 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
708 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE | 731 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE |
709 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 732 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
710 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE | 733 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE |
711 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 734 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
712 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 735 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
713 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
714 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
715 }; | 738 }; |
716 | 739 |
| 740 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
717 /*************** to unicode *******************/ | 741 /*************** to unicode *******************/ |
718 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | 742 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
719 /* 0 1 2 3 4
5 6 7 8 9 */ | 743 /* 0 1 2 3 4
5 6 7 8 9 */ |
720 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 744 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
721 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 745 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
722 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 746 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
723 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
724 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 | 748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 |
725 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 749 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
726 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
727 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 751 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
728 }; | 752 }; |
| 753 #endif |
729 | 754 |
730 | 755 |
731 static UCNV_TableStates_2022 | 756 static UCNV_TableStates_2022 |
732 getKey_2022(char c,int32_t* key,int32_t* offset){ | 757 getKey_2022(char c,int32_t* key,int32_t* offset){ |
733 int32_t togo; | 758 int32_t togo; |
734 int32_t low = 0; | 759 int32_t low = 0; |
735 int32_t hi = MAX_STATES_2022; | 760 int32_t hi = MAX_STATES_2022; |
736 int32_t oldmid=0; | 761 int32_t oldmid=0; |
737 | 762 |
738 togo = normalize_esq_chars_2022[(uint8_t)c]; | 763 togo = normalize_esq_chars_2022[(uint8_t)c]; |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
871 if(myData2022->toU2022State.g<2) { | 896 if(myData2022->toU2022State.g<2) { |
872 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; | 897 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; |
873 } | 898 } |
874 myData2022->toU2022State.g=2; | 899 myData2022->toU2022State.g=2; |
875 } else { | 900 } else { |
876 /* illegal to have SS2 before a matching designator */ | 901 /* illegal to have SS2 before a matching designator */ |
877 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 902 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
878 } | 903 } |
879 break; | 904 break; |
880 /* case SS3_STATE: not used in ISO-2022-JP-x */ | 905 /* case SS3_STATE: not used in ISO-2022-JP-x */ |
| 906 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
881 case ISO8859_1: | 907 case ISO8859_1: |
882 case ISO8859_7: | 908 case ISO8859_7: |
883 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 909 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
884 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 910 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
885 } else { | 911 } else { |
886 /* G2 charset for SS2 */ | 912 /* G2 charset for SS2 */ |
887 myData2022->toU2022State.cs[2]=(int8_t)tempState; | 913 myData2022->toU2022State.cs[2]=(int8_t)tempState; |
888 } | 914 } |
889 break; | 915 break; |
| 916 #endif |
890 default: | 917 default: |
891 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 918 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
892 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 919 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
893 } else { | 920 } else { |
894 /* G0 charset */ | 921 /* G0 charset */ |
895 myData2022->toU2022State.cs[0]=(int8_t)tempState; | 922 myData2022->toU2022State.cs[0]=(int8_t)tempState; |
896 } | 923 } |
897 break; | 924 break; |
898 } | 925 } |
899 } | 926 } |
900 break; | 927 break; |
| 928 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
901 case ISO_2022_CN: | 929 case ISO_2022_CN: |
902 { | 930 { |
903 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; | 931 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
904 switch(tempState) { | 932 switch(tempState) { |
905 case INVALID_STATE: | 933 case INVALID_STATE: |
906 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 934 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
907 break; | 935 break; |
908 case SS2_STATE: | 936 case SS2_STATE: |
909 if(myData2022->toU2022State.cs[2]!=0) { | 937 if(myData2022->toU2022State.cs[2]!=0) { |
910 if(myData2022->toU2022State.g<2) { | 938 if(myData2022->toU2022State.g<2) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
952 } | 980 } |
953 } | 981 } |
954 break; | 982 break; |
955 case ISO_2022_KR: | 983 case ISO_2022_KR: |
956 if(offset==0x30){ | 984 if(offset==0x30){ |
957 /* nothing to be done, just accept this one escape sequence */ | 985 /* nothing to be done, just accept this one escape sequence */ |
958 } else { | 986 } else { |
959 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 987 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
960 } | 988 } |
961 break; | 989 break; |
| 990 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
962 | 991 |
963 default: | 992 default: |
964 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 993 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
965 break; | 994 break; |
966 } | 995 } |
967 } | 996 } |
968 if(U_SUCCESS(*err)) { | 997 if(U_SUCCESS(*err)) { |
969 _this->toULength = 0; | 998 _this->toULength = 0; |
970 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { | 999 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { |
971 if(_this->toULength>1) { | 1000 if(_this->toULength>1) { |
(...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1372 * KSC5601 : alias to ibm-949 mapping table | 1401 * KSC5601 : alias to ibm-949 mapping table |
1373 * GB2312 : alias to ibm-1386 mapping table | 1402 * GB2312 : alias to ibm-1386 mapping table |
1374 * ISO-8859-1 : Algorithmic implemented as LATIN1 case | 1403 * ISO-8859-1 : Algorithmic implemented as LATIN1 case |
1375 * ISO-8859-7 : alisas to ibm-9409 mapping table | 1404 * ISO-8859-7 : alisas to ibm-9409 mapping table |
1376 */ | 1405 */ |
1377 | 1406 |
1378 /* preference order of JP charsets */ | 1407 /* preference order of JP charsets */ |
1379 static const StateEnum jpCharsetPref[]={ | 1408 static const StateEnum jpCharsetPref[]={ |
1380 ASCII, | 1409 ASCII, |
1381 JISX201, | 1410 JISX201, |
| 1411 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1382 ISO8859_1, | 1412 ISO8859_1, |
1383 ISO8859_7, | 1413 ISO8859_7, |
| 1414 #endif |
1384 JISX208, | 1415 JISX208, |
| 1416 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1385 JISX212, | 1417 JISX212, |
1386 GB2312, | 1418 GB2312, |
1387 KSC5601, | 1419 KSC5601, |
| 1420 #endif |
1388 HWKANA_7BIT | 1421 HWKANA_7BIT |
1389 }; | 1422 }; |
1390 | 1423 |
1391 /* | 1424 /* |
1392 * The escape sequences must be in order of the enum constants like JISX201 = 3
, | 1425 * The escape sequences must be in order of the enum constants like JISX201 = 3
, |
1393 * not in order of jpCharsetPref[]! | 1426 * not in order of jpCharsetPref[]! |
1394 */ | 1427 */ |
1395 static const char escSeqChars[][6] ={ | 1428 static const char escSeqChars[][6] ={ |
1396 "\x1B\x28\x42", /* <ESC>(B ASCII */ | 1429 "\x1B\x28\x42", /* <ESC>(B ASCII */ |
1397 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ | 1430 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ |
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1747 int8_t cs0 = choices[i]; | 1780 int8_t cs0 = choices[i]; |
1748 switch(cs0) { | 1781 switch(cs0) { |
1749 case ASCII: | 1782 case ASCII: |
1750 if(sourceChar <= 0x7f) { | 1783 if(sourceChar <= 0x7f) { |
1751 targetValue = (uint32_t)sourceChar; | 1784 targetValue = (uint32_t)sourceChar; |
1752 len = 1; | 1785 len = 1; |
1753 cs = cs0; | 1786 cs = cs0; |
1754 g = 0; | 1787 g = 0; |
1755 } | 1788 } |
1756 break; | 1789 break; |
| 1790 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1757 case ISO8859_1: | 1791 case ISO8859_1: |
1758 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { | 1792 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
1759 targetValue = (uint32_t)sourceChar - 0x80; | 1793 targetValue = (uint32_t)sourceChar - 0x80; |
1760 len = 1; | 1794 len = 1; |
1761 cs = cs0; | 1795 cs = cs0; |
1762 g = 2; | 1796 g = 2; |
1763 } | 1797 } |
1764 break; | 1798 break; |
| 1799 #endif |
1765 case HWKANA_7BIT: | 1800 case HWKANA_7BIT: |
1766 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { | 1801 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { |
1767 if(converterData->version==3) { | 1802 if(converterData->version==3) { |
1768 /* JIS7: use G1 (SO) */ | 1803 /* JIS7: use G1 (SO) */ |
1769 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ | 1804 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ |
1770 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); | 1805 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); |
1771 len = 1; | 1806 len = 1; |
1772 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ | 1807 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ |
1773 g = 1; | 1808 g = 1; |
1774 } else if(converterData->version==4) { | 1809 } else if(converterData->version==4) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1816 } | 1851 } |
1817 } else if(len == 0 && useFallback && | 1852 } else if(len == 0 && useFallback && |
1818 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { | 1853 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { |
1819 targetValue = hwkana_fb[sourceChar - HWKANA_START]; | 1854 targetValue = hwkana_fb[sourceChar - HWKANA_START]; |
1820 len = -2; | 1855 len = -2; |
1821 cs = cs0; | 1856 cs = cs0; |
1822 g = 0; | 1857 g = 0; |
1823 useFallback = FALSE; | 1858 useFallback = FALSE; |
1824 } | 1859 } |
1825 break; | 1860 break; |
| 1861 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1826 case ISO8859_7: | 1862 case ISO8859_7: |
1827 /* G0 SBCS forced to 7-bit output */ | 1863 /* G0 SBCS forced to 7-bit output */ |
1828 len2 = MBCS_SINGLE_FROM_UCHAR32( | 1864 len2 = MBCS_SINGLE_FROM_UCHAR32( |
1829 converterData->myConverterArray[cs0], | 1865 converterData->myConverterArray[cs0], |
1830 sourceChar, &value, | 1866 sourceChar, &value, |
1831 useFallback); | 1867 useFallback); |
1832 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { | 1868 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { |
1833 targetValue = value - 0x80; | 1869 targetValue = value - 0x80; |
1834 len = len2; | 1870 len = len2; |
1835 cs = cs0; | 1871 cs = cs0; |
1836 g = 2; | 1872 g = 2; |
1837 useFallback = FALSE; | 1873 useFallback = FALSE; |
1838 } | 1874 } |
1839 break; | 1875 break; |
| 1876 #endif |
1840 default: | 1877 default: |
1841 /* G0 DBCS */ | 1878 /* G0 DBCS */ |
1842 len2 = MBCS_FROM_UCHAR32_ISO2022( | 1879 len2 = MBCS_FROM_UCHAR32_ISO2022( |
1843 converterData->myConverterArray[cs0], | 1880 converterData->myConverterArray[cs0], |
1844 sourceChar, &value, | 1881 sourceChar, &value, |
1845 useFallback, MBCS_OUTPUT_2); | 1882 useFallback, MBCS_OUTPUT_2); |
1846 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ | 1883 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ |
| 1884 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1847 if(cs0 == KSC5601) { | 1885 if(cs0 == KSC5601) { |
1848 /* | 1886 /* |
1849 * Check for valid bytes for the encoding scheme. | 1887 * Check for valid bytes for the encoding scheme. |
1850 * This is necessary because the sub-converter (wind
ows-949) | 1888 * This is necessary because the sub-converter (wind
ows-949) |
1851 * has a broader encoding scheme than is valid for 2
022. | 1889 * has a broader encoding scheme than is valid for 2
022. |
1852 */ | 1890 */ |
1853 value = _2022FromGR94DBCS(value); | 1891 value = _2022FromGR94DBCS(value); |
1854 if(value == 0) { | 1892 if(value == 0) { |
1855 break; | 1893 break; |
1856 } | 1894 } |
1857 } | 1895 } |
| 1896 #endif |
1858 targetValue = value; | 1897 targetValue = value; |
1859 len = len2; | 1898 len = len2; |
1860 cs = cs0; | 1899 cs = cs0; |
1861 g = 0; | 1900 g = 0; |
1862 useFallback = FALSE; | 1901 useFallback = FALSE; |
1863 } | 1902 } |
1864 break; | 1903 break; |
1865 } | 1904 } |
1866 } | 1905 } |
1867 | 1906 |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2141 /* return from a single-shift state to the previous one */ | 2180 /* return from a single-shift state to the previous one */ |
2142 if(pToU2022State->g >= 2) { | 2181 if(pToU2022State->g >= 2) { |
2143 pToU2022State->g=pToU2022State->prevG; | 2182 pToU2022State->g=pToU2022State->prevG; |
2144 } | 2183 } |
2145 } else switch(cs) { | 2184 } else switch(cs) { |
2146 case ASCII: | 2185 case ASCII: |
2147 if(mySourceChar <= 0x7f) { | 2186 if(mySourceChar <= 0x7f) { |
2148 targetUniChar = mySourceChar; | 2187 targetUniChar = mySourceChar; |
2149 } | 2188 } |
2150 break; | 2189 break; |
| 2190 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
2151 case ISO8859_1: | 2191 case ISO8859_1: |
2152 if(mySourceChar <= 0x7f) { | 2192 if(mySourceChar <= 0x7f) { |
2153 targetUniChar = mySourceChar + 0x80; | 2193 targetUniChar = mySourceChar + 0x80; |
2154 } | 2194 } |
2155 /* return from a single-shift state to the previous one */ | 2195 /* return from a single-shift state to the previous one */ |
2156 pToU2022State->g=pToU2022State->prevG; | 2196 pToU2022State->g=pToU2022State->prevG; |
2157 break; | 2197 break; |
2158 case ISO8859_7: | 2198 case ISO8859_7: |
2159 if(mySourceChar <= 0x7f) { | 2199 if(mySourceChar <= 0x7f) { |
2160 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ | 2200 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ |
2161 targetUniChar = | 2201 targetUniChar = |
2162 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( | 2202 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( |
2163 myData->myConverterArray[cs], | 2203 myData->myConverterArray[cs], |
2164 mySourceChar + 0x80); | 2204 mySourceChar + 0x80); |
2165 } | 2205 } |
2166 /* return from a single-shift state to the previous one */ | 2206 /* return from a single-shift state to the previous one */ |
2167 pToU2022State->g=pToU2022State->prevG; | 2207 pToU2022State->g=pToU2022State->prevG; |
2168 break; | 2208 break; |
| 2209 #endif |
2169 case JISX201: | 2210 case JISX201: |
2170 if(mySourceChar <= 0x7f) { | 2211 if(mySourceChar <= 0x7f) { |
2171 targetUniChar = jisx201ToU(mySourceChar); | 2212 targetUniChar = jisx201ToU(mySourceChar); |
2172 } | 2213 } |
2173 break; | 2214 break; |
2174 case HWKANA_7BIT: | 2215 case HWKANA_7BIT: |
2175 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { | 2216 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { |
2176 /* 7-bit halfwidth Katakana */ | 2217 /* 7-bit halfwidth Katakana */ |
2177 targetUniChar = mySourceChar + (HWKANA_START - 0x21); | 2218 targetUniChar = mySourceChar + (HWKANA_START - 0x21); |
2178 } | 2219 } |
(...skipping 19 matching lines...) Expand all Loading... |
2198 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; | 2239 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; |
2199 if (leadIsOk && trailIsOk) { | 2240 if (leadIsOk && trailIsOk) { |
2200 ++mySource; | 2241 ++mySource; |
2201 tmpSourceChar = (mySourceChar << 8) | trailByte; | 2242 tmpSourceChar = (mySourceChar << 8) | trailByte; |
2202 if(cs == JISX208) { | 2243 if(cs == JISX208) { |
2203 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); | 2244 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); |
2204 mySourceChar = tmpSourceChar; | 2245 mySourceChar = tmpSourceChar; |
2205 } else { | 2246 } else { |
2206 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ | 2247 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ |
2207 mySourceChar = tmpSourceChar; | 2248 mySourceChar = tmpSourceChar; |
| 2249 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
2208 if (cs == KSC5601) { | 2250 if (cs == KSC5601) { |
2209 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ | 2251 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ |
2210 } | 2252 } |
| 2253 #endif |
2211 tempBuf[0] = (char)(tmpSourceChar >> 8); | 2254 tempBuf[0] = (char)(tmpSourceChar >> 8); |
2212 tempBuf[1] = (char)(tmpSourceChar); | 2255 tempBuf[1] = (char)(tmpSourceChar); |
2213 } | 2256 } |
2214 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); | 2257 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); |
2215 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { | 2258 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { |
2216 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ | 2259 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ |
2217 ++mySource; | 2260 ++mySource; |
2218 /* add another bit so that the code below writes 2 b
ytes in case of error */ | 2261 /* add another bit so that the code below writes 2 b
ytes in case of error */ |
2219 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; | 2262 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; |
2220 } | 2263 } |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2262 *err =U_BUFFER_OVERFLOW_ERROR; | 2305 *err =U_BUFFER_OVERFLOW_ERROR; |
2263 break; | 2306 break; |
2264 } | 2307 } |
2265 } | 2308 } |
2266 endloop: | 2309 endloop: |
2267 args->target = myTarget; | 2310 args->target = myTarget; |
2268 args->source = mySource; | 2311 args->source = mySource; |
2269 } | 2312 } |
2270 | 2313 |
2271 | 2314 |
| 2315 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
2272 /*************************************************************** | 2316 /*************************************************************** |
2273 * Rules for ISO-2022-KR encoding | 2317 * Rules for ISO-2022-KR encoding |
2274 * i) The KSC5601 designator sequence should appear only once in a file, | 2318 * i) The KSC5601 designator sequence should appear only once in a file, |
2275 * at the begining of a line before any KSC5601 characters. This usually | 2319 * at the begining of a line before any KSC5601 characters. This usually |
2276 * means that it appears by itself on the first line of the file | 2320 * means that it appears by itself on the first line of the file |
2277 * ii) There are only 2 shifting sequences SO to shift into double byte mode | 2321 * ii) There are only 2 shifting sequences SO to shift into double byte mode |
2278 * and SI to shift into single byte mode | 2322 * and SI to shift into single byte mode |
2279 */ | 2323 */ |
2280 static void | 2324 static void |
2281 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ | 2325 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ |
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3405 } | 3449 } |
3406 else{ | 3450 else{ |
3407 *err =U_BUFFER_OVERFLOW_ERROR; | 3451 *err =U_BUFFER_OVERFLOW_ERROR; |
3408 break; | 3452 break; |
3409 } | 3453 } |
3410 } | 3454 } |
3411 endloop: | 3455 endloop: |
3412 args->target = myTarget; | 3456 args->target = myTarget; |
3413 args->source = mySource; | 3457 args->source = mySource; |
3414 } | 3458 } |
| 3459 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
3415 | 3460 |
3416 static void | 3461 static void |
3417 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { | 3462 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { |
3418 UConverter *cnv = args->converter; | 3463 UConverter *cnv = args->converter; |
3419 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; | 3464 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; |
3420 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; | 3465 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; |
3421 char *p, *subchar; | 3466 char *p, *subchar; |
3422 char buffer[8]; | 3467 char buffer[8]; |
3423 int32_t length; | 3468 int32_t length; |
3424 | 3469 |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3606 #endif | 3651 #endif |
3607 | 3652 |
3608 cnvData = (UConverterDataISO2022*)cnv->extraInfo; | 3653 cnvData = (UConverterDataISO2022*)cnv->extraInfo; |
3609 | 3654 |
3610 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ | 3655 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ |
3611 switch(cnvData->locale[0]){ | 3656 switch(cnvData->locale[0]){ |
3612 case 'j': | 3657 case 'j': |
3613 /* include JIS X 0201 which is hardcoded */ | 3658 /* include JIS X 0201 which is hardcoded */ |
3614 sa->add(sa->set, 0xa5); | 3659 sa->add(sa->set, 0xa5); |
3615 sa->add(sa->set, 0x203e); | 3660 sa->add(sa->set, 0x203e); |
| 3661 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
3616 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | 3662 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
3617 /* include Latin-1 for some variants of JP */ | 3663 /* include Latin-1 for some variants of JP */ |
3618 sa->addRange(sa->set, 0, 0xff); | 3664 sa->addRange(sa->set, 0, 0xff); |
3619 } else { | 3665 } else { |
3620 /* include ASCII for JP */ | 3666 /* include ASCII for JP */ |
3621 sa->addRange(sa->set, 0, 0x7f); | 3667 sa->addRange(sa->set, 0, 0x7f); |
3622 } | 3668 } |
| 3669 #else |
| 3670 /* include ASCII for JP */ |
| 3671 sa->addRange(sa->set, 0, 0x7f); |
| 3672 #endif |
3623 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { | 3673 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { |
3624 /* | 3674 /* |
3625 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 | 3675 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 |
3626 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) | 3676 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) |
3627 * use half-width Katakana. | 3677 * use half-width Katakana. |
3628 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) | 3678 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) |
3629 * half-width Katakana via the ESC ( I sequence. | 3679 * half-width Katakana via the ESC ( I sequence. |
3630 * However, we only emit (fromUnicode) half-width Katakana according
to the | 3680 * However, we only emit (fromUnicode) half-width Katakana according
to the |
3631 * definition of each variant. | 3681 * definition of each variant. |
3632 * | 3682 * |
3633 * When including fallbacks, | 3683 * When including fallbacks, |
3634 * we need to include half-width Katakana Unicode code points for al
l JP variants because | 3684 * we need to include half-width Katakana Unicode code points for al
l JP variants because |
3635 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). | 3685 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). |
3636 */ | 3686 */ |
3637 /* include half-width Katakana for JP */ | 3687 /* include half-width Katakana for JP */ |
3638 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | 3688 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
3639 } | 3689 } |
3640 break; | 3690 break; |
| 3691 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
3641 case 'c': | 3692 case 'c': |
3642 case 'z': | 3693 case 'z': |
3643 /* include ASCII for CN */ | 3694 /* include ASCII for CN */ |
3644 sa->addRange(sa->set, 0, 0x7f); | 3695 sa->addRange(sa->set, 0, 0x7f); |
3645 break; | 3696 break; |
3646 case 'k': | 3697 case 'k': |
3647 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ | 3698 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ |
3648 cnvData->currentConverter->sharedData->impl->getUnicodeSet( | 3699 cnvData->currentConverter->sharedData->impl->getUnicodeSet( |
3649 cnvData->currentConverter, sa, which, pErrorCode); | 3700 cnvData->currentConverter, sa, which, pErrorCode); |
3650 /* the loop over myConverterArray[] will simply not find another convert
er */ | 3701 /* the loop over myConverterArray[] will simply not find another convert
er */ |
3651 break; | 3702 break; |
| 3703 #endif |
3652 default: | 3704 default: |
3653 break; | 3705 break; |
3654 } | 3706 } |
3655 | 3707 |
3656 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem
ent ucnv_getUnicodeSet() with reverse fallbacks. */ | 3708 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem
ent ucnv_getUnicodeSet() with reverse fallbacks. */ |
3657 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 3709 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
3658 cnvData->version==0 && i==CNS_11643 | 3710 cnvData->version==0 && i==CNS_11643 |
3659 ) { | 3711 ) { |
3660 /* special handling for non-EXT ISO-2022-CN: add only code point
s for CNS planes 1 and 2 */ | 3712 /* special handling for non-EXT ISO-2022-CN: add only code point
s for CNS planes 1 and 2 */ |
3661 ucnv_MBCSGetUnicodeSetForBytes( | 3713 ucnv_MBCSGetUnicodeSetForBytes( |
3662 cnvData->myConverterArray[i], | 3714 cnvData->myConverterArray[i], |
3663 sa, UCNV_ROUNDTRIP_SET, | 3715 sa, UCNV_ROUNDTRIP_SET, |
3664 0, 0x81, 0x82, | 3716 0, 0x81, 0x82, |
3665 pErrorCode); | 3717 pErrorCode); |
3666 } | 3718 } |
3667 #endif | 3719 #endif |
3668 | 3720 |
3669 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { | 3721 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
3670 UConverterSetFilter filter; | 3722 UConverterSetFilter filter; |
3671 if(cnvData->myConverterArray[i]!=NULL) { | 3723 if(cnvData->myConverterArray[i]!=NULL) { |
3672 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 3724 if(cnvData->locale[0]=='j' && i==JISX208) { |
3673 cnvData->version==0 && i==CNS_11643 | 3725 /* |
3674 ) { | 3726 * Only add code points that map to Shift-JIS codes |
| 3727 * corresponding to JIS X 0208. |
| 3728 */ |
| 3729 filter=UCNV_SET_FILTER_SJIS; |
| 3730 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 3731 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
| 3732 cnvData->version==0 && i==CNS_11643) { |
3675 /* | 3733 /* |
3676 * Version-specific for CN: | 3734 * Version-specific for CN: |
3677 * CN version 0 does not map CNS planes 3..7 although | 3735 * CN version 0 does not map CNS planes 3..7 although |
3678 * they are all available in the CNS conversion table; | 3736 * they are all available in the CNS conversion table; |
3679 * CN version 1 (-EXT) does map them all. | 3737 * CN version 1 (-EXT) does map them all. |
3680 * The two versions create different Unicode sets. | 3738 * The two versions create different Unicode sets. |
3681 */ | 3739 */ |
3682 filter=UCNV_SET_FILTER_2022_CN; | 3740 filter=UCNV_SET_FILTER_2022_CN; |
3683 } else if(cnvData->locale[0]=='j' && i==JISX208) { | |
3684 /* | |
3685 * Only add code points that map to Shift-JIS codes | |
3686 * corresponding to JIS X 0208. | |
3687 */ | |
3688 filter=UCNV_SET_FILTER_SJIS; | |
3689 } else if(i==KSC5601) { | 3741 } else if(i==KSC5601) { |
3690 /* | 3742 /* |
3691 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on
multiple tables) | 3743 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on
multiple tables) |
3692 * are broader than GR94. | 3744 * are broader than GR94. |
3693 */ | 3745 */ |
3694 filter=UCNV_SET_FILTER_GR94DBCS; | 3746 filter=UCNV_SET_FILTER_GR94DBCS; |
| 3747 #endif |
3695 } else { | 3748 } else { |
3696 filter=UCNV_SET_FILTER_NONE; | 3749 filter=UCNV_SET_FILTER_NONE; |
3697 } | 3750 } |
3698 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); | 3751 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); |
3699 } | 3752 } |
3700 } | 3753 } |
3701 | 3754 |
3702 /* | 3755 /* |
3703 * ISO 2022 converters must not convert SO/SI/ESC despite what | 3756 * ISO 2022 converters must not convert SO/SI/ESC despite what |
3704 * sub-converters do by themselves. | 3757 * sub-converters do by themselves. |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3822 NULL, | 3875 NULL, |
3823 NULL, | 3876 NULL, |
3824 &_ISO2022JPStaticData, | 3877 &_ISO2022JPStaticData, |
3825 FALSE, | 3878 FALSE, |
3826 &_ISO2022JPImpl, | 3879 &_ISO2022JPImpl, |
3827 0, UCNV_MBCS_TABLE_INITIALIZER | 3880 0, UCNV_MBCS_TABLE_INITIALIZER |
3828 }; | 3881 }; |
3829 | 3882 |
3830 } // namespace | 3883 } // namespace |
3831 | 3884 |
| 3885 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
3832 /************* KR ***************/ | 3886 /************* KR ***************/ |
3833 static const UConverterImpl _ISO2022KRImpl={ | 3887 static const UConverterImpl _ISO2022KRImpl={ |
3834 UCNV_ISO_2022, | 3888 UCNV_ISO_2022, |
3835 | 3889 |
3836 NULL, | 3890 NULL, |
3837 NULL, | 3891 NULL, |
3838 | 3892 |
3839 _ISO2022Open, | 3893 _ISO2022Open, |
3840 _ISO2022Close, | 3894 _ISO2022Close, |
3841 _ISO2022Reset, | 3895 _ISO2022Reset, |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3938 ~((uint32_t) 0), | 3992 ~((uint32_t) 0), |
3939 NULL, | 3993 NULL, |
3940 NULL, | 3994 NULL, |
3941 &_ISO2022CNStaticData, | 3995 &_ISO2022CNStaticData, |
3942 FALSE, | 3996 FALSE, |
3943 &_ISO2022CNImpl, | 3997 &_ISO2022CNImpl, |
3944 0, UCNV_MBCS_TABLE_INITIALIZER | 3998 0, UCNV_MBCS_TABLE_INITIALIZER |
3945 }; | 3999 }; |
3946 | 4000 |
3947 } // namespace | 4001 } // namespace |
| 4002 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
3948 | 4003 |
3949 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 4004 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
OLD | NEW |