Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(473)

Side by Side Diff: source/common/ucnv2022.cpp

Issue 587833004: Turn on UCONFIG_NO_NON_HTML5_CONVERTER to save 100kB (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/icu52/
Patch Set: more tests added to desc Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « scripts/ibm866_gen.sh ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2000-2012, International Business Machines 3 * Copyright (C) 2000-2012, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 * file name: ucnv2022.cpp 6 * file name: ucnv2022.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 CNS_11643_1, 147 CNS_11643_1,
148 CNS_11643_2, 148 CNS_11643_2,
149 CNS_11643_3, 149 CNS_11643_3,
150 CNS_11643_4, 150 CNS_11643_4,
151 CNS_11643_5, 151 CNS_11643_5,
152 CNS_11643_6, 152 CNS_11643_6,
153 CNS_11643_7 153 CNS_11643_7
154 } StateEnum; 154 } StateEnum;
155 155
156 /* is the StateEnum charset value for a DBCS charset? */ 156 /* is the StateEnum charset value for a DBCS charset? */
157 #if UCONFIG_NO_NON_HTML5_CONVERSION
158 #define IS_JP_DBCS(cs) (JISX208==(cs))
159 #else
157 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 160 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
161 #endif
158 162
159 #define CSM(cs) ((uint16_t)1<<(cs)) 163 #define CSM(cs) ((uint16_t)1<<(cs))
160 164
161 /* 165 /*
162 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence 166 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence
163 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x 167 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x
164 * 168 *
165 * Note: The converter uses some leniency: 169 * Note: The converter uses some leniency:
166 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in 170 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
167 * all versions, not just JIS7 and JIS8. 171 * all versions, not just JIS7 and JIS8.
168 * - ICU does not distinguish between different versions of JIS X 0208. 172 * - ICU does not distinguish between different versions of JIS X 0208.
169 */ 173 */
170 #if UCONFIG_NO_NON_HTML5_CONVERSION 174 #if UCONFIG_NO_NON_HTML5_CONVERSION
171 enum { MAX_JA_VERSION=0 }; 175 enum { MAX_JA_VERSION=0 };
172 #else 176 #else
173 enum { MAX_JA_VERSION=4 }; 177 enum { MAX_JA_VERSION=4 };
174 #endif 178 #endif
175 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 179 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
180 /*
181 * TODO(jshin): The encoding spec has JISX212, but we don't support it.
182 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
183 */
176 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
177 #if !UCONFIG_NO_NON_HTML5_CONVERSION 185 #if !UCONFIG_NO_NON_HTML5_CONVERSION
178 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
179 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
180 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 188 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
181 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 189 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
182 #endif 190 #endif
183 }; 191 };
184 192
185 typedef enum { 193 typedef enum {
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
359 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_ 2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 367 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_ 2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
360 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 368 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
361 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T ERMINAL_2022 ,VALID_TERMINAL_2022 369 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T ERMINAL_2022 ,VALID_TERMINAL_2022
362 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
363 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
364 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
365 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 373 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
366 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 374 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
367 }; 375 };
368 376
369
370 /* Enable ISO-2022-{KR,CN,CN-Ext} for now.
371 * TODO(jshin): Disable it when we know what to do about 'replacement'
372 * encodings. See http://crbug.com/277037 and
373 * https://codereview.chromium.org/145973021/
374 */
375 #ifndef U_ENABLE_ISO_2022_KR_CN
376 #define U_ENABLE_ISO_2022_KR_CN 1
377 #endif
378
379 /* Type def for refactoring changeState_2022 code*/ 377 /* Type def for refactoring changeState_2022 code*/
380 typedef enum{ 378 typedef enum{
381 #ifdef U_ENABLE_GENERIC_ISO_2022 379 #ifdef U_ENABLE_GENERIC_ISO_2022
382 ISO_2022=0, 380 ISO_2022=0,
383 #endif 381 #endif
382 #if UCONFIG_NO_NON_HTML5_CONVERSION
383 ISO_2022_JP=1
384 #else
384 ISO_2022_JP=1, 385 ISO_2022_JP=1,
385 #ifdef U_ENABLE_ISO_2022_KR_CN
386 ISO_2022_KR=2, 386 ISO_2022_KR=2,
387 ISO_2022_CN=3 387 ISO_2022_CN=3
388 #endif 388 #endif
389 } Variant2022; 389 } Variant2022;
390 390
391 /*********** ISO 2022 Converter Protos ***********/ 391 /*********** ISO 2022 Converter Protos ***********/
392 static void 392 static void
393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
394 394
395 static void 395 static void
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
510 #endif 510 #endif
511 myConverterData->myConverterArray[JISX208] = 511 myConverterData->myConverterArray[JISX208] =
512 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code); 512 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code);
513 #if !UCONFIG_NO_NON_HTML5_CONVERSION 513 #if !UCONFIG_NO_NON_HTML5_CONVERSION
514 if(jpCharsetMasks[version]&CSM(JISX212)) { 514 if(jpCharsetMasks[version]&CSM(JISX212)) {
515 myConverterData->myConverterArray[JISX212] = 515 myConverterData->myConverterArray[JISX212] =
516 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode); 516 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode);
517 } 517 }
518 if(jpCharsetMasks[version]&CSM(GB2312)) { 518 if(jpCharsetMasks[version]&CSM(GB2312)) {
519 myConverterData->myConverterArray[GB2312] = 519 myConverterData->myConverterArray[GB2312] =
520 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackAr gs, errorCode); /* gb_2312_80-1 */ 520 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */
521 } 521 }
522 if(jpCharsetMasks[version]&CSM(KSC5601)) { 522 if(jpCharsetMasks[version]&CSM(KSC5601)) {
523 myConverterData->myConverterArray[KSC5601] = 523 myConverterData->myConverterArray[KSC5601] =
524 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode); 524 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode);
525 } 525 }
526 #endif 526 #endif
527 527
528 /* set the function pointers to appropriate funtions */ 528 /* set the function pointers to appropriate funtions */
529 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 529 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
530 uprv_strcpy(myConverterData->locale,"ja"); 530 uprv_strcpy(myConverterData->locale,"ja");
531 531
532 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= "); 532 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= ");
533 len = uprv_strlen(myConverterData->name); 533 len = uprv_strlen(myConverterData->name);
534 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ; 534 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ;
535 myConverterData->name[len+1]='\0'; 535 myConverterData->name[len+1]='\0';
536 } 536 }
537 #ifdef U_ENABLE_ISO_2022_KR_CN 537 #if !UCONFIG_NO_NON_HTML5_CONVERSION
538 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 538 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
539 (myLocale[2]=='_' || myLocale[2]=='\0')) 539 (myLocale[2]=='_' || myLocale[2]=='\0'))
540 { 540 {
541 const char *cnvName; 541 const char *cnvName;
542 if(version==1) { 542 if(version==1) {
543 cnvName="icu-internal-25546"; 543 cnvName="icu-internal-25546";
544 } else { 544 } else {
545 cnvName="ibm-949"; 545 cnvName="ibm-949";
546 myConverterData->version=version=0; 546 myConverterData->version=version=0;
547 } 547 }
(...skipping 25 matching lines...) Expand all
573 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; 573 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
574 uprv_strcpy(myConverterData->locale,"ko"); 574 uprv_strcpy(myConverterData->locale,"ko");
575 } 575 }
576 } 576 }
577 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m yLocale[1]=='n'))&& 577 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m yLocale[1]=='n'))&&
578 (myLocale[2]=='_' || myLocale[2]=='\0')) 578 (myLocale[2]=='_' || myLocale[2]=='\0'))
579 { 579 {
580 580
581 /* open the required converters and cache them */ 581 /* open the required converters and cache them */
582 myConverterData->myConverterArray[GB2312_1] = 582 myConverterData->myConverterArray[GB2312_1] =
583 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); 583 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorC ode);
584 if(version==1) { 584 if(version==1) {
585 myConverterData->myConverterArray[ISO_IR_165] = 585 myConverterData->myConverterArray[ISO_IR_165] =
586 ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackA rgs, errorCode); 586 ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
587 } 587 }
588 myConverterData->myConverterArray[CNS_11643] = 588 myConverterData->myConverterArray[CNS_11643] =
589 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode); 589 ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
590 590
591 591
592 /* set the function pointers to appropriate funtions */ 592 /* set the function pointers to appropriate funtions */
593 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; 593 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
594 uprv_strcpy(myConverterData->locale,"cn"); 594 uprv_strcpy(myConverterData->locale,"cn");
595 595
596 if (version==0){ 596 if (version==0){
597 myConverterData->version = 0; 597 myConverterData->version = 0;
598 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0"); 598 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0");
599 }else if (version==1){ 599 }else if (version==1){
600 myConverterData->version = 1; 600 myConverterData->version = 1;
601 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1"); 601 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1");
602 }else { 602 }else {
603 myConverterData->version = 2; 603 myConverterData->version = 2;
604 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2"); 604 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2");
605 } 605 }
606 } 606 }
607 #endif // U_ENABLE_ISO_2022_KR_CN 607 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION
608 else{ 608 else{
609 #ifdef U_ENABLE_GENERIC_ISO_2022 609 #ifdef U_ENABLE_GENERIC_ISO_2022
610 myConverterData->isFirstBuffer = TRUE; 610 myConverterData->isFirstBuffer = TRUE;
611 611
612 /* append the UTF-8 escape sequence */ 612 /* append the UTF-8 escape sequence */
613 cnv->charErrorBufferLength = 3; 613 cnv->charErrorBufferLength = 3;
614 cnv->charErrorBuffer[0] = 0x1b; 614 cnv->charErrorBuffer[0] = 0x1b;
615 cnv->charErrorBuffer[1] = 0x25; 615 cnv->charErrorBuffer[1] = 0x25;
616 cnv->charErrorBuffer[2] = 0x42; 616 cnv->charErrorBuffer[2] = 0x42;
617 617
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
732 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 732 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
733 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE 733 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE
734 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 734 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
735 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE 735 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE
736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
738 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 738 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
739 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 739 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
740 }; 740 };
741 741
742 #if !UCONFIG_NO_NON_HTML5_CONVERSION
742 /*************** to unicode *******************/ 743 /*************** to unicode *******************/
743 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 744 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
744 /* 0 1 2 3 4 5 6 7 8 9 */ 745 /* 0 1 2 3 4 5 6 7 8 9 */
745 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 746 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
746 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 749 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
749 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
750 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 751 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
751 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 752 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
752 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 753 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
753 }; 754 };
755 #endif
754 756
755 757
756 static UCNV_TableStates_2022 758 static UCNV_TableStates_2022
757 getKey_2022(char c,int32_t* key,int32_t* offset){ 759 getKey_2022(char c,int32_t* key,int32_t* offset){
758 int32_t togo; 760 int32_t togo;
759 int32_t low = 0; 761 int32_t low = 0;
760 int32_t hi = MAX_STATES_2022; 762 int32_t hi = MAX_STATES_2022;
761 int32_t oldmid=0; 763 int32_t oldmid=0;
762 764
763 togo = normalize_esq_chars_2022[(uint8_t)c]; 765 togo = normalize_esq_chars_2022[(uint8_t)c];
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
896 if(myData2022->toU2022State.g<2) { 898 if(myData2022->toU2022State.g<2) {
897 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g; 899 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;
898 } 900 }
899 myData2022->toU2022State.g=2; 901 myData2022->toU2022State.g=2;
900 } else { 902 } else {
901 /* illegal to have SS2 before a matching designator */ 903 /* illegal to have SS2 before a matching designator */
902 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 904 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
903 } 905 }
904 break; 906 break;
905 /* case SS3_STATE: not used in ISO-2022-JP-x */ 907 /* case SS3_STATE: not used in ISO-2022-JP-x */
908 #if !UCONFIG_NO_NON_HTML5_CONVERSION
906 case ISO8859_1: 909 case ISO8859_1:
907 case ISO8859_7: 910 case ISO8859_7:
908 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 911 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
909 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 912 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
910 } else { 913 } else {
911 /* G2 charset for SS2 */ 914 /* G2 charset for SS2 */
912 myData2022->toU2022State.cs[2]=(int8_t)tempState; 915 myData2022->toU2022State.cs[2]=(int8_t)tempState;
913 } 916 }
914 break; 917 break;
918 #endif
915 default: 919 default:
916 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 920 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
917 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 921 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
918 } else { 922 } else {
919 /* G0 charset */ 923 /* G0 charset */
920 myData2022->toU2022State.cs[0]=(int8_t)tempState; 924 myData2022->toU2022State.cs[0]=(int8_t)tempState;
921 } 925 }
922 break; 926 break;
923 } 927 }
924 } 928 }
925 break; 929 break;
930 #if !UCONFIG_NO_NON_HTML5_CONVERSION
926 case ISO_2022_CN: 931 case ISO_2022_CN:
927 { 932 {
928 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 933 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
929 switch(tempState) { 934 switch(tempState) {
930 case INVALID_STATE: 935 case INVALID_STATE:
931 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 936 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
932 break; 937 break;
933 case SS2_STATE: 938 case SS2_STATE:
934 if(myData2022->toU2022State.cs[2]!=0) { 939 if(myData2022->toU2022State.cs[2]!=0) {
935 if(myData2022->toU2022State.g<2) { 940 if(myData2022->toU2022State.g<2) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
977 } 982 }
978 } 983 }
979 break; 984 break;
980 case ISO_2022_KR: 985 case ISO_2022_KR:
981 if(offset==0x30){ 986 if(offset==0x30){
982 /* nothing to be done, just accept this one escape sequence */ 987 /* nothing to be done, just accept this one escape sequence */
983 } else { 988 } else {
984 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 989 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
985 } 990 }
986 break; 991 break;
992 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
987 993
988 default: 994 default:
989 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 995 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
990 break; 996 break;
991 } 997 }
992 } 998 }
993 if(U_SUCCESS(*err)) { 999 if(U_SUCCESS(*err)) {
994 _this->toULength = 0; 1000 _this->toULength = 0;
995 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { 1001 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
996 if(_this->toULength>1) { 1002 if(_this->toULength>1) {
(...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after
1397 * KSC5601 : alias to ibm-949 mapping table 1403 * KSC5601 : alias to ibm-949 mapping table
1398 * GB2312 : alias to ibm-1386 mapping table 1404 * GB2312 : alias to ibm-1386 mapping table
1399 * ISO-8859-1 : Algorithmic implemented as LATIN1 case 1405 * ISO-8859-1 : Algorithmic implemented as LATIN1 case
1400 * ISO-8859-7 : alisas to ibm-9409 mapping table 1406 * ISO-8859-7 : alisas to ibm-9409 mapping table
1401 */ 1407 */
1402 1408
1403 /* preference order of JP charsets */ 1409 /* preference order of JP charsets */
1404 static const StateEnum jpCharsetPref[]={ 1410 static const StateEnum jpCharsetPref[]={
1405 ASCII, 1411 ASCII,
1406 JISX201, 1412 JISX201,
1413 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1407 ISO8859_1, 1414 ISO8859_1,
1408 ISO8859_7, 1415 ISO8859_7,
1416 #endif
1409 JISX208, 1417 JISX208,
1418 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1410 JISX212, 1419 JISX212,
1411 GB2312, 1420 GB2312,
1412 KSC5601, 1421 KSC5601,
1422 #endif
1413 HWKANA_7BIT 1423 HWKANA_7BIT
1414 }; 1424 };
1415 1425
1416 /* 1426 /*
1417 * The escape sequences must be in order of the enum constants like JISX201 = 3 , 1427 * The escape sequences must be in order of the enum constants like JISX201 = 3 ,
1418 * not in order of jpCharsetPref[]! 1428 * not in order of jpCharsetPref[]!
1419 */ 1429 */
1420 static const char escSeqChars[][6] ={ 1430 static const char escSeqChars[][6] ={
1421 "\x1B\x28\x42", /* <ESC>(B ASCII */ 1431 "\x1B\x28\x42", /* <ESC>(B ASCII */
1422 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ 1432 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after
1772 int8_t cs0 = choices[i]; 1782 int8_t cs0 = choices[i];
1773 switch(cs0) { 1783 switch(cs0) {
1774 case ASCII: 1784 case ASCII:
1775 if(sourceChar <= 0x7f) { 1785 if(sourceChar <= 0x7f) {
1776 targetValue = (uint32_t)sourceChar; 1786 targetValue = (uint32_t)sourceChar;
1777 len = 1; 1787 len = 1;
1778 cs = cs0; 1788 cs = cs0;
1779 g = 0; 1789 g = 0;
1780 } 1790 }
1781 break; 1791 break;
1792 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1782 case ISO8859_1: 1793 case ISO8859_1:
1783 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 1794 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1784 targetValue = (uint32_t)sourceChar - 0x80; 1795 targetValue = (uint32_t)sourceChar - 0x80;
1785 len = 1; 1796 len = 1;
1786 cs = cs0; 1797 cs = cs0;
1787 g = 2; 1798 g = 2;
1788 } 1799 }
1789 break; 1800 break;
1801 #endif
1790 case HWKANA_7BIT: 1802 case HWKANA_7BIT:
1791 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) { 1803 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) {
1792 if(converterData->version==3) { 1804 if(converterData->version==3) {
1793 /* JIS7: use G1 (SO) */ 1805 /* JIS7: use G1 (SO) */
1794 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ 1806 /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1795 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); 1807 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
1796 len = 1; 1808 len = 1;
1797 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ 1809 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
1798 g = 1; 1810 g = 1;
1799 } else if(converterData->version==4) { 1811 } else if(converterData->version==4) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1841 } 1853 }
1842 } else if(len == 0 && useFallback && 1854 } else if(len == 0 && useFallback &&
1843 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) { 1855 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) {
1844 targetValue = hwkana_fb[sourceChar - HWKANA_START]; 1856 targetValue = hwkana_fb[sourceChar - HWKANA_START];
1845 len = -2; 1857 len = -2;
1846 cs = cs0; 1858 cs = cs0;
1847 g = 0; 1859 g = 0;
1848 useFallback = FALSE; 1860 useFallback = FALSE;
1849 } 1861 }
1850 break; 1862 break;
1863 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1851 case ISO8859_7: 1864 case ISO8859_7:
1852 /* G0 SBCS forced to 7-bit output */ 1865 /* G0 SBCS forced to 7-bit output */
1853 len2 = MBCS_SINGLE_FROM_UCHAR32( 1866 len2 = MBCS_SINGLE_FROM_UCHAR32(
1854 converterData->myConverterArray[cs0], 1867 converterData->myConverterArray[cs0],
1855 sourceChar, &value, 1868 sourceChar, &value,
1856 useFallback); 1869 useFallback);
1857 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) { 1870 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) {
1858 targetValue = value - 0x80; 1871 targetValue = value - 0x80;
1859 len = len2; 1872 len = len2;
1860 cs = cs0; 1873 cs = cs0;
1861 g = 2; 1874 g = 2;
1862 useFallback = FALSE; 1875 useFallback = FALSE;
1863 } 1876 }
1864 break; 1877 break;
1878 #endif
1865 default: 1879 default:
1866 /* G0 DBCS */ 1880 /* G0 DBCS */
1867 len2 = MBCS_FROM_UCHAR32_ISO2022( 1881 len2 = MBCS_FROM_UCHAR32_ISO2022(
1868 converterData->myConverterArray[cs0], 1882 converterData->myConverterArray[cs0],
1869 sourceChar, &value, 1883 sourceChar, &value,
1870 useFallback, MBCS_OUTPUT_2); 1884 useFallback, MBCS_OUTPUT_2);
1871 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1885 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1886 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1872 if(cs0 == KSC5601) { 1887 if(cs0 == KSC5601) {
1873 /* 1888 /*
1874 * Check for valid bytes for the encoding scheme. 1889 * Check for valid bytes for the encoding scheme.
1875 * This is necessary because the sub-converter (wind ows-949) 1890 * This is necessary because the sub-converter (wind ows-949)
1876 * has a broader encoding scheme than is valid for 2 022. 1891 * has a broader encoding scheme than is valid for 2 022.
1877 */ 1892 */
1878 value = _2022FromGR94DBCS(value); 1893 value = _2022FromGR94DBCS(value);
1879 if(value == 0) { 1894 if(value == 0) {
1880 break; 1895 break;
1881 } 1896 }
1882 } 1897 }
1898 #endif
1883 targetValue = value; 1899 targetValue = value;
1884 len = len2; 1900 len = len2;
1885 cs = cs0; 1901 cs = cs0;
1886 g = 0; 1902 g = 0;
1887 useFallback = FALSE; 1903 useFallback = FALSE;
1888 } 1904 }
1889 break; 1905 break;
1890 } 1906 }
1891 } 1907 }
1892 1908
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
2166 /* return from a single-shift state to the previous one */ 2182 /* return from a single-shift state to the previous one */
2167 if(pToU2022State->g >= 2) { 2183 if(pToU2022State->g >= 2) {
2168 pToU2022State->g=pToU2022State->prevG; 2184 pToU2022State->g=pToU2022State->prevG;
2169 } 2185 }
2170 } else switch(cs) { 2186 } else switch(cs) {
2171 case ASCII: 2187 case ASCII:
2172 if(mySourceChar <= 0x7f) { 2188 if(mySourceChar <= 0x7f) {
2173 targetUniChar = mySourceChar; 2189 targetUniChar = mySourceChar;
2174 } 2190 }
2175 break; 2191 break;
2192 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2176 case ISO8859_1: 2193 case ISO8859_1:
2177 if(mySourceChar <= 0x7f) { 2194 if(mySourceChar <= 0x7f) {
2178 targetUniChar = mySourceChar + 0x80; 2195 targetUniChar = mySourceChar + 0x80;
2179 } 2196 }
2180 /* return from a single-shift state to the previous one */ 2197 /* return from a single-shift state to the previous one */
2181 pToU2022State->g=pToU2022State->prevG; 2198 pToU2022State->g=pToU2022State->prevG;
2182 break; 2199 break;
2183 case ISO8859_7: 2200 case ISO8859_7:
2184 if(mySourceChar <= 0x7f) { 2201 if(mySourceChar <= 0x7f) {
2185 /* convert mySourceChar+0x80 to use a normal 8-bit table */ 2202 /* convert mySourceChar+0x80 to use a normal 8-bit table */
2186 targetUniChar = 2203 targetUniChar =
2187 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( 2204 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
2188 myData->myConverterArray[cs], 2205 myData->myConverterArray[cs],
2189 mySourceChar + 0x80); 2206 mySourceChar + 0x80);
2190 } 2207 }
2191 /* return from a single-shift state to the previous one */ 2208 /* return from a single-shift state to the previous one */
2192 pToU2022State->g=pToU2022State->prevG; 2209 pToU2022State->g=pToU2022State->prevG;
2193 break; 2210 break;
2211 #endif
2194 case JISX201: 2212 case JISX201:
2195 if(mySourceChar <= 0x7f) { 2213 if(mySourceChar <= 0x7f) {
2196 targetUniChar = jisx201ToU(mySourceChar); 2214 targetUniChar = jisx201ToU(mySourceChar);
2197 } 2215 }
2198 break; 2216 break;
2199 case HWKANA_7BIT: 2217 case HWKANA_7BIT:
2200 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { 2218 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
2201 /* 7-bit halfwidth Katakana */ 2219 /* 7-bit halfwidth Katakana */
2202 targetUniChar = mySourceChar + (HWKANA_START - 0x21); 2220 targetUniChar = mySourceChar + (HWKANA_START - 0x21);
2203 } 2221 }
(...skipping 19 matching lines...) Expand all
2223 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ; 2241 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ;
2224 if (leadIsOk && trailIsOk) { 2242 if (leadIsOk && trailIsOk) {
2225 ++mySource; 2243 ++mySource;
2226 tmpSourceChar = (mySourceChar << 8) | trailByte; 2244 tmpSourceChar = (mySourceChar << 8) | trailByte;
2227 if(cs == JISX208) { 2245 if(cs == JISX208) {
2228 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf); 2246 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf);
2229 mySourceChar = tmpSourceChar; 2247 mySourceChar = tmpSourceChar;
2230 } else { 2248 } else {
2231 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */ 2249 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */
2232 mySourceChar = tmpSourceChar; 2250 mySourceChar = tmpSourceChar;
2251 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2233 if (cs == KSC5601) { 2252 if (cs == KSC5601) {
2234 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */ 2253 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */
2235 } 2254 }
2255 #endif
2236 tempBuf[0] = (char)(tmpSourceChar >> 8); 2256 tempBuf[0] = (char)(tmpSourceChar >> 8);
2237 tempBuf[1] = (char)(tmpSourceChar); 2257 tempBuf[1] = (char)(tmpSourceChar);
2238 } 2258 }
2239 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE); 2259 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE);
2240 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 2260 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
2241 /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 2261 /* report a pair of illegal bytes if the second byte is not a DBCS starter */
2242 ++mySource; 2262 ++mySource;
2243 /* add another bit so that the code below writes 2 b ytes in case of error */ 2263 /* add another bit so that the code below writes 2 b ytes in case of error */
2244 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte; 2264 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte;
2245 } 2265 }
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2287 *err =U_BUFFER_OVERFLOW_ERROR; 2307 *err =U_BUFFER_OVERFLOW_ERROR;
2288 break; 2308 break;
2289 } 2309 }
2290 } 2310 }
2291 endloop: 2311 endloop:
2292 args->target = myTarget; 2312 args->target = myTarget;
2293 args->source = mySource; 2313 args->source = mySource;
2294 } 2314 }
2295 2315
2296 2316
2317 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2297 /*************************************************************** 2318 /***************************************************************
2298 * Rules for ISO-2022-KR encoding 2319 * Rules for ISO-2022-KR encoding
2299 * i) The KSC5601 designator sequence should appear only once in a file, 2320 * i) The KSC5601 designator sequence should appear only once in a file,
2300 * at the begining of a line before any KSC5601 characters. This usually 2321 * at the begining of a line before any KSC5601 characters. This usually
2301 * means that it appears by itself on the first line of the file 2322 * means that it appears by itself on the first line of the file
2302 * ii) There are only 2 shifting sequences SO to shift into double byte mode 2323 * ii) There are only 2 shifting sequences SO to shift into double byte mode
2303 * and SI to shift into single byte mode 2324 * and SI to shift into single byte mode
2304 */ 2325 */
2305 static void 2326 static void
2306 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2327 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after
3430 } 3451 }
3431 else{ 3452 else{
3432 *err =U_BUFFER_OVERFLOW_ERROR; 3453 *err =U_BUFFER_OVERFLOW_ERROR;
3433 break; 3454 break;
3434 } 3455 }
3435 } 3456 }
3436 endloop: 3457 endloop:
3437 args->target = myTarget; 3458 args->target = myTarget;
3438 args->source = mySource; 3459 args->source = mySource;
3439 } 3460 }
3461 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
3440 3462
3441 static void 3463 static void
3442 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) { 3464 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) {
3443 UConverter *cnv = args->converter; 3465 UConverter *cnv = args->converter;
3444 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo; 3466 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo;
3445 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; 3467 ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3446 char *p, *subchar; 3468 char *p, *subchar;
3447 char buffer[8]; 3469 char buffer[8];
3448 int32_t length; 3470 int32_t length;
3449 3471
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
3631 #endif 3653 #endif
3632 3654
3633 cnvData = (UConverterDataISO2022*)cnv->extraInfo; 3655 cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3634 3656
3635 /* open a set and initialize it with code points that are algorithmically ro und-tripped */ 3657 /* open a set and initialize it with code points that are algorithmically ro und-tripped */
3636 switch(cnvData->locale[0]){ 3658 switch(cnvData->locale[0]){
3637 case 'j': 3659 case 'j':
3638 /* include JIS X 0201 which is hardcoded */ 3660 /* include JIS X 0201 which is hardcoded */
3639 sa->add(sa->set, 0xa5); 3661 sa->add(sa->set, 0xa5);
3640 sa->add(sa->set, 0x203e); 3662 sa->add(sa->set, 0x203e);
3663 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3641 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3664 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3642 /* include Latin-1 for some variants of JP */ 3665 /* include Latin-1 for some variants of JP */
3643 sa->addRange(sa->set, 0, 0xff); 3666 sa->addRange(sa->set, 0, 0xff);
3644 } else { 3667 } else {
3645 /* include ASCII for JP */ 3668 /* include ASCII for JP */
3646 sa->addRange(sa->set, 0, 0x7f); 3669 sa->addRange(sa->set, 0, 0x7f);
3647 } 3670 }
3671 #else
3672 /* include ASCII for JP */
3673 sa->addRange(sa->set, 0, 0x7f);
3674 #endif
3648 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) { 3675 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) {
3649 /* 3676 /*
3650 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0 3677 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0
3651 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) 3678 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3652 * use half-width Katakana. 3679 * use half-width Katakana.
3653 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) 3680 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3654 * half-width Katakana via the ESC ( I sequence. 3681 * half-width Katakana via the ESC ( I sequence.
3655 * However, we only emit (fromUnicode) half-width Katakana according to the 3682 * However, we only emit (fromUnicode) half-width Katakana according to the
3656 * definition of each variant. 3683 * definition of each variant.
3657 * 3684 *
3658 * When including fallbacks, 3685 * When including fallbacks,
3659 * we need to include half-width Katakana Unicode code points for al l JP variants because 3686 * we need to include half-width Katakana Unicode code points for al l JP variants because
3660 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana). 3687 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana).
3661 */ 3688 */
3662 /* include half-width Katakana for JP */ 3689 /* include half-width Katakana for JP */
3663 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3690 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3664 } 3691 }
3665 break; 3692 break;
3693 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3666 case 'c': 3694 case 'c':
3667 case 'z': 3695 case 'z':
3668 /* include ASCII for CN */ 3696 /* include ASCII for CN */
3669 sa->addRange(sa->set, 0, 0x7f); 3697 sa->addRange(sa->set, 0, 0x7f);
3670 break; 3698 break;
3671 case 'k': 3699 case 'k':
3672 /* there is only one converter for KR, and it is not in the myConverterA rray[] */ 3700 /* there is only one converter for KR, and it is not in the myConverterA rray[] */
3673 cnvData->currentConverter->sharedData->impl->getUnicodeSet( 3701 cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3674 cnvData->currentConverter, sa, which, pErrorCode); 3702 cnvData->currentConverter, sa, which, pErrorCode);
3675 /* the loop over myConverterArray[] will simply not find another convert er */ 3703 /* the loop over myConverterArray[] will simply not find another convert er */
3676 break; 3704 break;
3705 #endif
3677 default: 3706 default:
3678 break; 3707 break;
3679 } 3708 }
3680 3709
3681 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem ent ucnv_getUnicodeSet() with reverse fallbacks. */ 3710 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem ent ucnv_getUnicodeSet() with reverse fallbacks. */
3682 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3711 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3683 cnvData->version==0 && i==CNS_11643 3712 cnvData->version==0 && i==CNS_11643
3684 ) { 3713 ) {
3685 /* special handling for non-EXT ISO-2022-CN: add only code point s for CNS planes 1 and 2 */ 3714 /* special handling for non-EXT ISO-2022-CN: add only code point s for CNS planes 1 and 2 */
3686 ucnv_MBCSGetUnicodeSetForBytes( 3715 ucnv_MBCSGetUnicodeSetForBytes(
3687 cnvData->myConverterArray[i], 3716 cnvData->myConverterArray[i],
3688 sa, UCNV_ROUNDTRIP_SET, 3717 sa, UCNV_ROUNDTRIP_SET,
3689 0, 0x81, 0x82, 3718 0, 0x81, 0x82,
3690 pErrorCode); 3719 pErrorCode);
3691 } 3720 }
3692 #endif 3721 #endif
3693 3722
3694 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 3723 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
3695 UConverterSetFilter filter; 3724 UConverterSetFilter filter;
3696 if(cnvData->myConverterArray[i]!=NULL) { 3725 if(cnvData->myConverterArray[i]!=NULL) {
3697 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3726 if(cnvData->locale[0]=='j' && i==JISX208) {
3698 cnvData->version==0 && i==CNS_11643 3727 /*
3699 ) { 3728 * Only add code points that map to Shift-JIS codes
3729 * corresponding to JIS X 0208.
3730 */
3731 filter=UCNV_SET_FILTER_SJIS;
3732 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3733 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3734 cnvData->version==0 && i==CNS_11643) {
3700 /* 3735 /*
3701 * Version-specific for CN: 3736 * Version-specific for CN:
3702 * CN version 0 does not map CNS planes 3..7 although 3737 * CN version 0 does not map CNS planes 3..7 although
3703 * they are all available in the CNS conversion table; 3738 * they are all available in the CNS conversion table;
3704 * CN version 1 (-EXT) does map them all. 3739 * CN version 1 (-EXT) does map them all.
3705 * The two versions create different Unicode sets. 3740 * The two versions create different Unicode sets.
3706 */ 3741 */
3707 filter=UCNV_SET_FILTER_2022_CN; 3742 filter=UCNV_SET_FILTER_2022_CN;
3708 } else if(cnvData->locale[0]=='j' && i==JISX208) {
3709 /*
3710 * Only add code points that map to Shift-JIS codes
3711 * corresponding to JIS X 0208.
3712 */
3713 filter=UCNV_SET_FILTER_SJIS;
3714 } else if(i==KSC5601) { 3743 } else if(i==KSC5601) {
3715 /* 3744 /*
3716 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) 3745 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
3717 * are broader than GR94. 3746 * are broader than GR94.
3718 */ 3747 */
3719 filter=UCNV_SET_FILTER_GR94DBCS; 3748 filter=UCNV_SET_FILTER_GR94DBCS;
3749 #endif
3720 } else { 3750 } else {
3721 filter=UCNV_SET_FILTER_NONE; 3751 filter=UCNV_SET_FILTER_NONE;
3722 } 3752 }
3723 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode); 3753 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode);
3724 } 3754 }
3725 } 3755 }
3726 3756
3727 /* 3757 /*
3728 * ISO 2022 converters must not convert SO/SI/ESC despite what 3758 * ISO 2022 converters must not convert SO/SI/ESC despite what
3729 * sub-converters do by themselves. 3759 * sub-converters do by themselves.
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
3847 NULL, 3877 NULL,
3848 NULL, 3878 NULL,
3849 &_ISO2022JPStaticData, 3879 &_ISO2022JPStaticData,
3850 FALSE, 3880 FALSE,
3851 &_ISO2022JPImpl, 3881 &_ISO2022JPImpl,
3852 0, UCNV_MBCS_TABLE_INITIALIZER 3882 0, UCNV_MBCS_TABLE_INITIALIZER
3853 }; 3883 };
3854 3884
3855 } // namespace 3885 } // namespace
3856 3886
3887 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3857 /************* KR ***************/ 3888 /************* KR ***************/
3858 static const UConverterImpl _ISO2022KRImpl={ 3889 static const UConverterImpl _ISO2022KRImpl={
3859 UCNV_ISO_2022, 3890 UCNV_ISO_2022,
3860 3891
3861 NULL, 3892 NULL,
3862 NULL, 3893 NULL,
3863 3894
3864 _ISO2022Open, 3895 _ISO2022Open,
3865 _ISO2022Close, 3896 _ISO2022Close,
3866 _ISO2022Reset, 3897 _ISO2022Reset,
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
3963 ~((uint32_t) 0), 3994 ~((uint32_t) 0),
3964 NULL, 3995 NULL,
3965 NULL, 3996 NULL,
3966 &_ISO2022CNStaticData, 3997 &_ISO2022CNStaticData,
3967 FALSE, 3998 FALSE,
3968 &_ISO2022CNImpl, 3999 &_ISO2022CNImpl,
3969 0, UCNV_MBCS_TABLE_INITIALIZER 4000 0, UCNV_MBCS_TABLE_INITIALIZER
3970 }; 4001 };
3971 4002
3972 } // namespace 4003 } // namespace
4004 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
3973 4005
3974 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 4006 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
OLDNEW
« no previous file with comments | « scripts/ibm866_gen.sh ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698