Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(184)

Side by Side Diff: source/common/ucnv2022.cpp

Issue 839713003: ICU update to 54 step 3 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: fix big5 mapping Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « scripts/sjis_gen.sh ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2000-2014, International Business Machines 3 * Copyright (C) 2000-2014, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 * file name: ucnv2022.cpp 6 * file name: ucnv2022.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 CNS_11643_1, 145 CNS_11643_1,
146 CNS_11643_2, 146 CNS_11643_2,
147 CNS_11643_3, 147 CNS_11643_3,
148 CNS_11643_4, 148 CNS_11643_4,
149 CNS_11643_5, 149 CNS_11643_5,
150 CNS_11643_6, 150 CNS_11643_6,
151 CNS_11643_7 151 CNS_11643_7
152 } StateEnum; 152 } StateEnum;
153 153
154 /* is the StateEnum charset value for a DBCS charset? */ 154 /* is the StateEnum charset value for a DBCS charset? */
155 #if UCONFIG_NO_NON_HTML5_CONVERSION
156 #define IS_JP_DBCS(cs) (JISX208==(cs))
157 #else
155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 158 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
159 #endif
156 160
157 #define CSM(cs) ((uint16_t)1<<(cs)) 161 #define CSM(cs) ((uint16_t)1<<(cs))
158 162
159 /* 163 /*
160 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence 164 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence
161 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x 165 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x
162 * 166 *
163 * Note: The converter uses some leniency: 167 * Note: The converter uses some leniency:
164 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in 168 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
165 * all versions, not just JIS7 and JIS8. 169 * all versions, not just JIS7 and JIS8.
166 * - ICU does not distinguish between different versions of JIS X 0208. 170 * - ICU does not distinguish between different versions of JIS X 0208.
167 */ 171 */
172 #if UCONFIG_NO_NON_HTML5_CONVERSION
173 enum { MAX_JA_VERSION=0 };
174 #else
168 enum { MAX_JA_VERSION=4 }; 175 enum { MAX_JA_VERSION=4 };
176 #endif
169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 177 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
178 /*
179 * TODO(jshin): The encoding spec has JISX212, but we don't support it.
180 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
181 */
170 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
183 #if !UCONFIG_NO_NON_HTML5_CONVERSION
171 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
172 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
173 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
174 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
188 #endif
175 }; 189 };
176 190
177 typedef enum { 191 typedef enum {
178 ASCII1=0, 192 ASCII1=0,
179 LATIN1, 193 LATIN1,
180 SBCS, 194 SBCS,
181 DBCS, 195 DBCS,
182 MBCS, 196 MBCS,
183 HWKANA 197 HWKANA
184 }Cnv2022Type; 198 }Cnv2022Type;
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after
351 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_ 2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 365 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_ 2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
352 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 366 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
353 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T ERMINAL_2022 ,VALID_TERMINAL_2022 367 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T ERMINAL_2022 ,VALID_TERMINAL_2022
354 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 368 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
355 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 369 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
356 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
357 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
358 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
359 }; 373 };
360 374
361
362 /* Type def for refactoring changeState_2022 code*/ 375 /* Type def for refactoring changeState_2022 code*/
363 typedef enum{ 376 typedef enum{
364 #ifdef U_ENABLE_GENERIC_ISO_2022 377 #ifdef U_ENABLE_GENERIC_ISO_2022
365 ISO_2022=0, 378 ISO_2022=0,
366 #endif 379 #endif
380 #if UCONFIG_NO_NON_HTML5_CONVERSION
381 ISO_2022_JP=1
382 #else
367 ISO_2022_JP=1, 383 ISO_2022_JP=1,
368 ISO_2022_KR=2, 384 ISO_2022_KR=2,
369 ISO_2022_CN=3 385 ISO_2022_CN=3
386 #endif
370 } Variant2022; 387 } Variant2022;
371 388
372 /*********** ISO 2022 Converter Protos ***********/ 389 /*********** ISO 2022 Converter Protos ***********/
373 static void 390 static void
374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 391 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
375 392
376 static void 393 static void
377 _ISO2022Close(UConverter *converter); 394 _ISO2022Close(UConverter *converter);
378 395
379 static void 396 static void
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 myConverterData->version = version; 493 myConverterData->version = version;
477 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && 494 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
478 (myLocale[2]=='_' || myLocale[2]=='\0')) 495 (myLocale[2]=='_' || myLocale[2]=='\0'))
479 { 496 {
480 size_t len=0; 497 size_t len=0;
481 /* open the required converters and cache them */ 498 /* open the required converters and cache them */
482 if(version>MAX_JA_VERSION) { 499 if(version>MAX_JA_VERSION) {
483 /* prevent indexing beyond jpCharsetMasks[] */ 500 /* prevent indexing beyond jpCharsetMasks[] */
484 myConverterData->version = version = 0; 501 myConverterData->version = version = 0;
485 } 502 }
503 #if !UCONFIG_NO_NON_HTML5_CONVERSION
486 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 504 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
487 myConverterData->myConverterArray[ISO8859_7] = 505 myConverterData->myConverterArray[ISO8859_7] =
488 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode); 506 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode);
489 } 507 }
508 #endif
490 myConverterData->myConverterArray[JISX208] = 509 myConverterData->myConverterArray[JISX208] =
491 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code); 510 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code);
511 #if !UCONFIG_NO_NON_HTML5_CONVERSION
492 if(jpCharsetMasks[version]&CSM(JISX212)) { 512 if(jpCharsetMasks[version]&CSM(JISX212)) {
493 myConverterData->myConverterArray[JISX212] = 513 myConverterData->myConverterArray[JISX212] =
494 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode); 514 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode);
495 } 515 }
496 if(jpCharsetMasks[version]&CSM(GB2312)) { 516 if(jpCharsetMasks[version]&CSM(GB2312)) {
497 myConverterData->myConverterArray[GB2312] = 517 myConverterData->myConverterArray[GB2312] =
498 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */ 518 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */
499 } 519 }
500 if(jpCharsetMasks[version]&CSM(KSC5601)) { 520 if(jpCharsetMasks[version]&CSM(KSC5601)) {
501 myConverterData->myConverterArray[KSC5601] = 521 myConverterData->myConverterArray[KSC5601] =
502 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode); 522 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode);
503 } 523 }
524 #endif
504 525
505 /* set the function pointers to appropriate funtions */ 526 /* set the function pointers to appropriate funtions */
506 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 527 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
507 uprv_strcpy(myConverterData->locale,"ja"); 528 uprv_strcpy(myConverterData->locale,"ja");
508 529
509 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= "); 530 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= ");
510 len = uprv_strlen(myConverterData->name); 531 len = uprv_strlen(myConverterData->name);
511 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ; 532 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ;
512 myConverterData->name[len+1]='\0'; 533 myConverterData->name[len+1]='\0';
513 } 534 }
535 #if !UCONFIG_NO_NON_HTML5_CONVERSION
514 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 536 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
515 (myLocale[2]=='_' || myLocale[2]=='\0')) 537 (myLocale[2]=='_' || myLocale[2]=='\0'))
516 { 538 {
517 const char *cnvName; 539 const char *cnvName;
518 if(version==1) { 540 if(version==1) {
519 cnvName="icu-internal-25546"; 541 cnvName="icu-internal-25546";
520 } else { 542 } else {
521 cnvName="ibm-949"; 543 cnvName="ibm-949";
522 myConverterData->version=version=0; 544 myConverterData->version=version=0;
523 } 545 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
573 myConverterData->version = 0; 595 myConverterData->version = 0;
574 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0"); 596 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0");
575 }else if (version==1){ 597 }else if (version==1){
576 myConverterData->version = 1; 598 myConverterData->version = 1;
577 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1"); 599 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1");
578 }else { 600 }else {
579 myConverterData->version = 2; 601 myConverterData->version = 2;
580 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2"); 602 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2");
581 } 603 }
582 } 604 }
605 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION
583 else{ 606 else{
584 #ifdef U_ENABLE_GENERIC_ISO_2022 607 #ifdef U_ENABLE_GENERIC_ISO_2022
585 myConverterData->isFirstBuffer = TRUE; 608 myConverterData->isFirstBuffer = TRUE;
586 609
587 /* append the UTF-8 escape sequence */ 610 /* append the UTF-8 escape sequence */
588 cnv->charErrorBufferLength = 3; 611 cnv->charErrorBufferLength = 3;
589 cnv->charErrorBuffer[0] = 0x1b; 612 cnv->charErrorBuffer[0] = 0x1b;
590 cnv->charErrorBuffer[1] = 0x25; 613 cnv->charErrorBuffer[1] = 0x25;
591 cnv->charErrorBuffer[2] = 0x42; 614 cnv->charErrorBuffer[2] = 0x42;
592 615
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
707 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 730 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
708 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE 731 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE
709 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 732 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
710 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE 733 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE
711 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 734 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
712 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 735 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
713 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
714 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
715 }; 738 };
716 739
740 #if !UCONFIG_NO_NON_HTML5_CONVERSION
717 /*************** to unicode *******************/ 741 /*************** to unicode *******************/
718 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 742 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
719 /* 0 1 2 3 4 5 6 7 8 9 */ 743 /* 0 1 2 3 4 5 6 7 8 9 */
720 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 744 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
721 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 745 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
722 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 746 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
723 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
724 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
725 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 749 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
726 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
727 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 751 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
728 }; 752 };
753 #endif
729 754
730 755
731 static UCNV_TableStates_2022 756 static UCNV_TableStates_2022
732 getKey_2022(char c,int32_t* key,int32_t* offset){ 757 getKey_2022(char c,int32_t* key,int32_t* offset){
733 int32_t togo; 758 int32_t togo;
734 int32_t low = 0; 759 int32_t low = 0;
735 int32_t hi = MAX_STATES_2022; 760 int32_t hi = MAX_STATES_2022;
736 int32_t oldmid=0; 761 int32_t oldmid=0;
737 762
738 togo = normalize_esq_chars_2022[(uint8_t)c]; 763 togo = normalize_esq_chars_2022[(uint8_t)c];
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
871 if(myData2022->toU2022State.g<2) { 896 if(myData2022->toU2022State.g<2) {
872 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g; 897 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;
873 } 898 }
874 myData2022->toU2022State.g=2; 899 myData2022->toU2022State.g=2;
875 } else { 900 } else {
876 /* illegal to have SS2 before a matching designator */ 901 /* illegal to have SS2 before a matching designator */
877 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 902 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
878 } 903 }
879 break; 904 break;
880 /* case SS3_STATE: not used in ISO-2022-JP-x */ 905 /* case SS3_STATE: not used in ISO-2022-JP-x */
906 #if !UCONFIG_NO_NON_HTML5_CONVERSION
881 case ISO8859_1: 907 case ISO8859_1:
882 case ISO8859_7: 908 case ISO8859_7:
883 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 909 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
884 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 910 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
885 } else { 911 } else {
886 /* G2 charset for SS2 */ 912 /* G2 charset for SS2 */
887 myData2022->toU2022State.cs[2]=(int8_t)tempState; 913 myData2022->toU2022State.cs[2]=(int8_t)tempState;
888 } 914 }
889 break; 915 break;
916 #endif
890 default: 917 default:
891 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 918 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
892 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 919 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
893 } else { 920 } else {
894 /* G0 charset */ 921 /* G0 charset */
895 myData2022->toU2022State.cs[0]=(int8_t)tempState; 922 myData2022->toU2022State.cs[0]=(int8_t)tempState;
896 } 923 }
897 break; 924 break;
898 } 925 }
899 } 926 }
900 break; 927 break;
928 #if !UCONFIG_NO_NON_HTML5_CONVERSION
901 case ISO_2022_CN: 929 case ISO_2022_CN:
902 { 930 {
903 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 931 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
904 switch(tempState) { 932 switch(tempState) {
905 case INVALID_STATE: 933 case INVALID_STATE:
906 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 934 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
907 break; 935 break;
908 case SS2_STATE: 936 case SS2_STATE:
909 if(myData2022->toU2022State.cs[2]!=0) { 937 if(myData2022->toU2022State.cs[2]!=0) {
910 if(myData2022->toU2022State.g<2) { 938 if(myData2022->toU2022State.g<2) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
952 } 980 }
953 } 981 }
954 break; 982 break;
955 case ISO_2022_KR: 983 case ISO_2022_KR:
956 if(offset==0x30){ 984 if(offset==0x30){
957 /* nothing to be done, just accept this one escape sequence */ 985 /* nothing to be done, just accept this one escape sequence */
958 } else { 986 } else {
959 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 987 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
960 } 988 }
961 break; 989 break;
990 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
962 991
963 default: 992 default:
964 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 993 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
965 break; 994 break;
966 } 995 }
967 } 996 }
968 if(U_SUCCESS(*err)) { 997 if(U_SUCCESS(*err)) {
969 _this->toULength = 0; 998 _this->toULength = 0;
970 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { 999 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
971 if(_this->toULength>1) { 1000 if(_this->toULength>1) {
(...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after
1372 * KSC5601 : alias to ibm-949 mapping table 1401 * KSC5601 : alias to ibm-949 mapping table
1373 * GB2312 : alias to ibm-1386 mapping table 1402 * GB2312 : alias to ibm-1386 mapping table
1374 * ISO-8859-1 : Algorithmic implemented as LATIN1 case 1403 * ISO-8859-1 : Algorithmic implemented as LATIN1 case
1375 * ISO-8859-7 : alisas to ibm-9409 mapping table 1404 * ISO-8859-7 : alisas to ibm-9409 mapping table
1376 */ 1405 */
1377 1406
1378 /* preference order of JP charsets */ 1407 /* preference order of JP charsets */
1379 static const StateEnum jpCharsetPref[]={ 1408 static const StateEnum jpCharsetPref[]={
1380 ASCII, 1409 ASCII,
1381 JISX201, 1410 JISX201,
1411 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1382 ISO8859_1, 1412 ISO8859_1,
1383 ISO8859_7, 1413 ISO8859_7,
1414 #endif
1384 JISX208, 1415 JISX208,
1416 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1385 JISX212, 1417 JISX212,
1386 GB2312, 1418 GB2312,
1387 KSC5601, 1419 KSC5601,
1420 #endif
1388 HWKANA_7BIT 1421 HWKANA_7BIT
1389 }; 1422 };
1390 1423
1391 /* 1424 /*
1392 * The escape sequences must be in order of the enum constants like JISX201 = 3 , 1425 * The escape sequences must be in order of the enum constants like JISX201 = 3 ,
1393 * not in order of jpCharsetPref[]! 1426 * not in order of jpCharsetPref[]!
1394 */ 1427 */
1395 static const char escSeqChars[][6] ={ 1428 static const char escSeqChars[][6] ={
1396 "\x1B\x28\x42", /* <ESC>(B ASCII */ 1429 "\x1B\x28\x42", /* <ESC>(B ASCII */
1397 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ 1430 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after
1747 int8_t cs0 = choices[i]; 1780 int8_t cs0 = choices[i];
1748 switch(cs0) { 1781 switch(cs0) {
1749 case ASCII: 1782 case ASCII:
1750 if(sourceChar <= 0x7f) { 1783 if(sourceChar <= 0x7f) {
1751 targetValue = (uint32_t)sourceChar; 1784 targetValue = (uint32_t)sourceChar;
1752 len = 1; 1785 len = 1;
1753 cs = cs0; 1786 cs = cs0;
1754 g = 0; 1787 g = 0;
1755 } 1788 }
1756 break; 1789 break;
1790 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1757 case ISO8859_1: 1791 case ISO8859_1:
1758 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 1792 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1759 targetValue = (uint32_t)sourceChar - 0x80; 1793 targetValue = (uint32_t)sourceChar - 0x80;
1760 len = 1; 1794 len = 1;
1761 cs = cs0; 1795 cs = cs0;
1762 g = 2; 1796 g = 2;
1763 } 1797 }
1764 break; 1798 break;
1799 #endif
1765 case HWKANA_7BIT: 1800 case HWKANA_7BIT:
1766 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) { 1801 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) {
1767 if(converterData->version==3) { 1802 if(converterData->version==3) {
1768 /* JIS7: use G1 (SO) */ 1803 /* JIS7: use G1 (SO) */
1769 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ 1804 /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1770 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); 1805 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
1771 len = 1; 1806 len = 1;
1772 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ 1807 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
1773 g = 1; 1808 g = 1;
1774 } else if(converterData->version==4) { 1809 } else if(converterData->version==4) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1816 } 1851 }
1817 } else if(len == 0 && useFallback && 1852 } else if(len == 0 && useFallback &&
1818 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) { 1853 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) {
1819 targetValue = hwkana_fb[sourceChar - HWKANA_START]; 1854 targetValue = hwkana_fb[sourceChar - HWKANA_START];
1820 len = -2; 1855 len = -2;
1821 cs = cs0; 1856 cs = cs0;
1822 g = 0; 1857 g = 0;
1823 useFallback = FALSE; 1858 useFallback = FALSE;
1824 } 1859 }
1825 break; 1860 break;
1861 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1826 case ISO8859_7: 1862 case ISO8859_7:
1827 /* G0 SBCS forced to 7-bit output */ 1863 /* G0 SBCS forced to 7-bit output */
1828 len2 = MBCS_SINGLE_FROM_UCHAR32( 1864 len2 = MBCS_SINGLE_FROM_UCHAR32(
1829 converterData->myConverterArray[cs0], 1865 converterData->myConverterArray[cs0],
1830 sourceChar, &value, 1866 sourceChar, &value,
1831 useFallback); 1867 useFallback);
1832 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) { 1868 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) {
1833 targetValue = value - 0x80; 1869 targetValue = value - 0x80;
1834 len = len2; 1870 len = len2;
1835 cs = cs0; 1871 cs = cs0;
1836 g = 2; 1872 g = 2;
1837 useFallback = FALSE; 1873 useFallback = FALSE;
1838 } 1874 }
1839 break; 1875 break;
1876 #endif
1840 default: 1877 default:
1841 /* G0 DBCS */ 1878 /* G0 DBCS */
1842 len2 = MBCS_FROM_UCHAR32_ISO2022( 1879 len2 = MBCS_FROM_UCHAR32_ISO2022(
1843 converterData->myConverterArray[cs0], 1880 converterData->myConverterArray[cs0],
1844 sourceChar, &value, 1881 sourceChar, &value,
1845 useFallback, MBCS_OUTPUT_2); 1882 useFallback, MBCS_OUTPUT_2);
1846 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1883 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1884 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1847 if(cs0 == KSC5601) { 1885 if(cs0 == KSC5601) {
1848 /* 1886 /*
1849 * Check for valid bytes for the encoding scheme. 1887 * Check for valid bytes for the encoding scheme.
1850 * This is necessary because the sub-converter (wind ows-949) 1888 * This is necessary because the sub-converter (wind ows-949)
1851 * has a broader encoding scheme than is valid for 2 022. 1889 * has a broader encoding scheme than is valid for 2 022.
1852 */ 1890 */
1853 value = _2022FromGR94DBCS(value); 1891 value = _2022FromGR94DBCS(value);
1854 if(value == 0) { 1892 if(value == 0) {
1855 break; 1893 break;
1856 } 1894 }
1857 } 1895 }
1896 #endif
1858 targetValue = value; 1897 targetValue = value;
1859 len = len2; 1898 len = len2;
1860 cs = cs0; 1899 cs = cs0;
1861 g = 0; 1900 g = 0;
1862 useFallback = FALSE; 1901 useFallback = FALSE;
1863 } 1902 }
1864 break; 1903 break;
1865 } 1904 }
1866 } 1905 }
1867 1906
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
2141 /* return from a single-shift state to the previous one */ 2180 /* return from a single-shift state to the previous one */
2142 if(pToU2022State->g >= 2) { 2181 if(pToU2022State->g >= 2) {
2143 pToU2022State->g=pToU2022State->prevG; 2182 pToU2022State->g=pToU2022State->prevG;
2144 } 2183 }
2145 } else switch(cs) { 2184 } else switch(cs) {
2146 case ASCII: 2185 case ASCII:
2147 if(mySourceChar <= 0x7f) { 2186 if(mySourceChar <= 0x7f) {
2148 targetUniChar = mySourceChar; 2187 targetUniChar = mySourceChar;
2149 } 2188 }
2150 break; 2189 break;
2190 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2151 case ISO8859_1: 2191 case ISO8859_1:
2152 if(mySourceChar <= 0x7f) { 2192 if(mySourceChar <= 0x7f) {
2153 targetUniChar = mySourceChar + 0x80; 2193 targetUniChar = mySourceChar + 0x80;
2154 } 2194 }
2155 /* return from a single-shift state to the previous one */ 2195 /* return from a single-shift state to the previous one */
2156 pToU2022State->g=pToU2022State->prevG; 2196 pToU2022State->g=pToU2022State->prevG;
2157 break; 2197 break;
2158 case ISO8859_7: 2198 case ISO8859_7:
2159 if(mySourceChar <= 0x7f) { 2199 if(mySourceChar <= 0x7f) {
2160 /* convert mySourceChar+0x80 to use a normal 8-bit table */ 2200 /* convert mySourceChar+0x80 to use a normal 8-bit table */
2161 targetUniChar = 2201 targetUniChar =
2162 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( 2202 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
2163 myData->myConverterArray[cs], 2203 myData->myConverterArray[cs],
2164 mySourceChar + 0x80); 2204 mySourceChar + 0x80);
2165 } 2205 }
2166 /* return from a single-shift state to the previous one */ 2206 /* return from a single-shift state to the previous one */
2167 pToU2022State->g=pToU2022State->prevG; 2207 pToU2022State->g=pToU2022State->prevG;
2168 break; 2208 break;
2209 #endif
2169 case JISX201: 2210 case JISX201:
2170 if(mySourceChar <= 0x7f) { 2211 if(mySourceChar <= 0x7f) {
2171 targetUniChar = jisx201ToU(mySourceChar); 2212 targetUniChar = jisx201ToU(mySourceChar);
2172 } 2213 }
2173 break; 2214 break;
2174 case HWKANA_7BIT: 2215 case HWKANA_7BIT:
2175 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { 2216 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
2176 /* 7-bit halfwidth Katakana */ 2217 /* 7-bit halfwidth Katakana */
2177 targetUniChar = mySourceChar + (HWKANA_START - 0x21); 2218 targetUniChar = mySourceChar + (HWKANA_START - 0x21);
2178 } 2219 }
(...skipping 19 matching lines...) Expand all
2198 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ; 2239 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ;
2199 if (leadIsOk && trailIsOk) { 2240 if (leadIsOk && trailIsOk) {
2200 ++mySource; 2241 ++mySource;
2201 tmpSourceChar = (mySourceChar << 8) | trailByte; 2242 tmpSourceChar = (mySourceChar << 8) | trailByte;
2202 if(cs == JISX208) { 2243 if(cs == JISX208) {
2203 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf); 2244 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf);
2204 mySourceChar = tmpSourceChar; 2245 mySourceChar = tmpSourceChar;
2205 } else { 2246 } else {
2206 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */ 2247 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */
2207 mySourceChar = tmpSourceChar; 2248 mySourceChar = tmpSourceChar;
2249 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2208 if (cs == KSC5601) { 2250 if (cs == KSC5601) {
2209 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */ 2251 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */
2210 } 2252 }
2253 #endif
2211 tempBuf[0] = (char)(tmpSourceChar >> 8); 2254 tempBuf[0] = (char)(tmpSourceChar >> 8);
2212 tempBuf[1] = (char)(tmpSourceChar); 2255 tempBuf[1] = (char)(tmpSourceChar);
2213 } 2256 }
2214 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE); 2257 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE);
2215 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 2258 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
2216 /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 2259 /* report a pair of illegal bytes if the second byte is not a DBCS starter */
2217 ++mySource; 2260 ++mySource;
2218 /* add another bit so that the code below writes 2 b ytes in case of error */ 2261 /* add another bit so that the code below writes 2 b ytes in case of error */
2219 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte; 2262 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte;
2220 } 2263 }
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2262 *err =U_BUFFER_OVERFLOW_ERROR; 2305 *err =U_BUFFER_OVERFLOW_ERROR;
2263 break; 2306 break;
2264 } 2307 }
2265 } 2308 }
2266 endloop: 2309 endloop:
2267 args->target = myTarget; 2310 args->target = myTarget;
2268 args->source = mySource; 2311 args->source = mySource;
2269 } 2312 }
2270 2313
2271 2314
2315 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2272 /*************************************************************** 2316 /***************************************************************
2273 * Rules for ISO-2022-KR encoding 2317 * Rules for ISO-2022-KR encoding
2274 * i) The KSC5601 designator sequence should appear only once in a file, 2318 * i) The KSC5601 designator sequence should appear only once in a file,
2275 * at the begining of a line before any KSC5601 characters. This usually 2319 * at the begining of a line before any KSC5601 characters. This usually
2276 * means that it appears by itself on the first line of the file 2320 * means that it appears by itself on the first line of the file
2277 * ii) There are only 2 shifting sequences SO to shift into double byte mode 2321 * ii) There are only 2 shifting sequences SO to shift into double byte mode
2278 * and SI to shift into single byte mode 2322 * and SI to shift into single byte mode
2279 */ 2323 */
2280 static void 2324 static void
2281 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2325 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after
3405 } 3449 }
3406 else{ 3450 else{
3407 *err =U_BUFFER_OVERFLOW_ERROR; 3451 *err =U_BUFFER_OVERFLOW_ERROR;
3408 break; 3452 break;
3409 } 3453 }
3410 } 3454 }
3411 endloop: 3455 endloop:
3412 args->target = myTarget; 3456 args->target = myTarget;
3413 args->source = mySource; 3457 args->source = mySource;
3414 } 3458 }
3459 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
3415 3460
3416 static void 3461 static void
3417 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) { 3462 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) {
3418 UConverter *cnv = args->converter; 3463 UConverter *cnv = args->converter;
3419 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo; 3464 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo;
3420 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; 3465 ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3421 char *p, *subchar; 3466 char *p, *subchar;
3422 char buffer[8]; 3467 char buffer[8];
3423 int32_t length; 3468 int32_t length;
3424 3469
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
3606 #endif 3651 #endif
3607 3652
3608 cnvData = (UConverterDataISO2022*)cnv->extraInfo; 3653 cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3609 3654
3610 /* open a set and initialize it with code points that are algorithmically ro und-tripped */ 3655 /* open a set and initialize it with code points that are algorithmically ro und-tripped */
3611 switch(cnvData->locale[0]){ 3656 switch(cnvData->locale[0]){
3612 case 'j': 3657 case 'j':
3613 /* include JIS X 0201 which is hardcoded */ 3658 /* include JIS X 0201 which is hardcoded */
3614 sa->add(sa->set, 0xa5); 3659 sa->add(sa->set, 0xa5);
3615 sa->add(sa->set, 0x203e); 3660 sa->add(sa->set, 0x203e);
3661 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3616 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3662 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3617 /* include Latin-1 for some variants of JP */ 3663 /* include Latin-1 for some variants of JP */
3618 sa->addRange(sa->set, 0, 0xff); 3664 sa->addRange(sa->set, 0, 0xff);
3619 } else { 3665 } else {
3620 /* include ASCII for JP */ 3666 /* include ASCII for JP */
3621 sa->addRange(sa->set, 0, 0x7f); 3667 sa->addRange(sa->set, 0, 0x7f);
3622 } 3668 }
3669 #else
3670 /* include ASCII for JP */
3671 sa->addRange(sa->set, 0, 0x7f);
3672 #endif
3623 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) { 3673 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) {
3624 /* 3674 /*
3625 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0 3675 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0
3626 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) 3676 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3627 * use half-width Katakana. 3677 * use half-width Katakana.
3628 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) 3678 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3629 * half-width Katakana via the ESC ( I sequence. 3679 * half-width Katakana via the ESC ( I sequence.
3630 * However, we only emit (fromUnicode) half-width Katakana according to the 3680 * However, we only emit (fromUnicode) half-width Katakana according to the
3631 * definition of each variant. 3681 * definition of each variant.
3632 * 3682 *
3633 * When including fallbacks, 3683 * When including fallbacks,
3634 * we need to include half-width Katakana Unicode code points for al l JP variants because 3684 * we need to include half-width Katakana Unicode code points for al l JP variants because
3635 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana). 3685 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana).
3636 */ 3686 */
3637 /* include half-width Katakana for JP */ 3687 /* include half-width Katakana for JP */
3638 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3688 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3639 } 3689 }
3640 break; 3690 break;
3691 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3641 case 'c': 3692 case 'c':
3642 case 'z': 3693 case 'z':
3643 /* include ASCII for CN */ 3694 /* include ASCII for CN */
3644 sa->addRange(sa->set, 0, 0x7f); 3695 sa->addRange(sa->set, 0, 0x7f);
3645 break; 3696 break;
3646 case 'k': 3697 case 'k':
3647 /* there is only one converter for KR, and it is not in the myConverterA rray[] */ 3698 /* there is only one converter for KR, and it is not in the myConverterA rray[] */
3648 cnvData->currentConverter->sharedData->impl->getUnicodeSet( 3699 cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3649 cnvData->currentConverter, sa, which, pErrorCode); 3700 cnvData->currentConverter, sa, which, pErrorCode);
3650 /* the loop over myConverterArray[] will simply not find another convert er */ 3701 /* the loop over myConverterArray[] will simply not find another convert er */
3651 break; 3702 break;
3703 #endif
3652 default: 3704 default:
3653 break; 3705 break;
3654 } 3706 }
3655 3707
3656 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem ent ucnv_getUnicodeSet() with reverse fallbacks. */ 3708 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem ent ucnv_getUnicodeSet() with reverse fallbacks. */
3657 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3709 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3658 cnvData->version==0 && i==CNS_11643 3710 cnvData->version==0 && i==CNS_11643
3659 ) { 3711 ) {
3660 /* special handling for non-EXT ISO-2022-CN: add only code point s for CNS planes 1 and 2 */ 3712 /* special handling for non-EXT ISO-2022-CN: add only code point s for CNS planes 1 and 2 */
3661 ucnv_MBCSGetUnicodeSetForBytes( 3713 ucnv_MBCSGetUnicodeSetForBytes(
3662 cnvData->myConverterArray[i], 3714 cnvData->myConverterArray[i],
3663 sa, UCNV_ROUNDTRIP_SET, 3715 sa, UCNV_ROUNDTRIP_SET,
3664 0, 0x81, 0x82, 3716 0, 0x81, 0x82,
3665 pErrorCode); 3717 pErrorCode);
3666 } 3718 }
3667 #endif 3719 #endif
3668 3720
3669 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 3721 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
3670 UConverterSetFilter filter; 3722 UConverterSetFilter filter;
3671 if(cnvData->myConverterArray[i]!=NULL) { 3723 if(cnvData->myConverterArray[i]!=NULL) {
3672 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3724 if(cnvData->locale[0]=='j' && i==JISX208) {
3673 cnvData->version==0 && i==CNS_11643 3725 /*
3674 ) { 3726 * Only add code points that map to Shift-JIS codes
3727 * corresponding to JIS X 0208.
3728 */
3729 filter=UCNV_SET_FILTER_SJIS;
3730 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3731 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3732 cnvData->version==0 && i==CNS_11643) {
3675 /* 3733 /*
3676 * Version-specific for CN: 3734 * Version-specific for CN:
3677 * CN version 0 does not map CNS planes 3..7 although 3735 * CN version 0 does not map CNS planes 3..7 although
3678 * they are all available in the CNS conversion table; 3736 * they are all available in the CNS conversion table;
3679 * CN version 1 (-EXT) does map them all. 3737 * CN version 1 (-EXT) does map them all.
3680 * The two versions create different Unicode sets. 3738 * The two versions create different Unicode sets.
3681 */ 3739 */
3682 filter=UCNV_SET_FILTER_2022_CN; 3740 filter=UCNV_SET_FILTER_2022_CN;
3683 } else if(cnvData->locale[0]=='j' && i==JISX208) {
3684 /*
3685 * Only add code points that map to Shift-JIS codes
3686 * corresponding to JIS X 0208.
3687 */
3688 filter=UCNV_SET_FILTER_SJIS;
3689 } else if(i==KSC5601) { 3741 } else if(i==KSC5601) {
3690 /* 3742 /*
3691 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) 3743 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
3692 * are broader than GR94. 3744 * are broader than GR94.
3693 */ 3745 */
3694 filter=UCNV_SET_FILTER_GR94DBCS; 3746 filter=UCNV_SET_FILTER_GR94DBCS;
3747 #endif
3695 } else { 3748 } else {
3696 filter=UCNV_SET_FILTER_NONE; 3749 filter=UCNV_SET_FILTER_NONE;
3697 } 3750 }
3698 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode); 3751 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode);
3699 } 3752 }
3700 } 3753 }
3701 3754
3702 /* 3755 /*
3703 * ISO 2022 converters must not convert SO/SI/ESC despite what 3756 * ISO 2022 converters must not convert SO/SI/ESC despite what
3704 * sub-converters do by themselves. 3757 * sub-converters do by themselves.
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
3822 NULL, 3875 NULL,
3823 NULL, 3876 NULL,
3824 &_ISO2022JPStaticData, 3877 &_ISO2022JPStaticData,
3825 FALSE, 3878 FALSE,
3826 &_ISO2022JPImpl, 3879 &_ISO2022JPImpl,
3827 0, UCNV_MBCS_TABLE_INITIALIZER 3880 0, UCNV_MBCS_TABLE_INITIALIZER
3828 }; 3881 };
3829 3882
3830 } // namespace 3883 } // namespace
3831 3884
3885 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3832 /************* KR ***************/ 3886 /************* KR ***************/
3833 static const UConverterImpl _ISO2022KRImpl={ 3887 static const UConverterImpl _ISO2022KRImpl={
3834 UCNV_ISO_2022, 3888 UCNV_ISO_2022,
3835 3889
3836 NULL, 3890 NULL,
3837 NULL, 3891 NULL,
3838 3892
3839 _ISO2022Open, 3893 _ISO2022Open,
3840 _ISO2022Close, 3894 _ISO2022Close,
3841 _ISO2022Reset, 3895 _ISO2022Reset,
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
3938 ~((uint32_t) 0), 3992 ~((uint32_t) 0),
3939 NULL, 3993 NULL,
3940 NULL, 3994 NULL,
3941 &_ISO2022CNStaticData, 3995 &_ISO2022CNStaticData,
3942 FALSE, 3996 FALSE,
3943 &_ISO2022CNImpl, 3997 &_ISO2022CNImpl,
3944 0, UCNV_MBCS_TABLE_INITIALIZER 3998 0, UCNV_MBCS_TABLE_INITIALIZER
3945 }; 3999 };
3946 4000
3947 } // namespace 4001 } // namespace
4002 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
3948 4003
3949 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 4004 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
OLDNEW
« no previous file with comments | « scripts/sjis_gen.sh ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698