Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(339)

Side by Side Diff: source/common/ucnv2022.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/ucmndata.h ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2000-2014, International Business Machines 3 * Copyright (C) 2000-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 * file name: ucnv2022.cpp 6 * file name: ucnv2022.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
11 * created on: 2000feb03 11 * created on: 2000feb03
12 * created by: Markus W. Scherer 12 * created by: Markus W. Scherer
13 * 13 *
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
68 * This means, for example, that when ISO-8859-7 is designated, the following 68 * This means, for example, that when ISO-8859-7 is designated, the following
69 * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. 69 * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
70 * The ICU ISO-2022 converter does not handle this - and has no information 70 * The ICU ISO-2022 converter does not handle this - and has no information
71 * about which subconverter would have to be shifted vs. which is designed 71 * about which subconverter would have to be shifted vs. which is designed
72 * for 7-bit ISO-2022. 72 * for 7-bit ISO-2022.
73 * 73 *
74 * Markus Scherer 2003-dec-03 74 * Markus Scherer 2003-dec-03
75 */ 75 */
76 #endif 76 #endif
77 77
78 #if !UCONFIG_ONLY_HTML_CONVERSION
78 static const char SHIFT_IN_STR[] = "\x0F"; 79 static const char SHIFT_IN_STR[] = "\x0F";
79 // static const char SHIFT_OUT_STR[] = "\x0E"; 80 // static const char SHIFT_OUT_STR[] = "\x0E";
81 #endif
80 82
81 #define CR 0x0D 83 #define CR 0x0D
82 #define LF 0x0A 84 #define LF 0x0A
83 #define H_TAB 0x09 85 #define H_TAB 0x09
84 #define V_TAB 0x0B 86 #define V_TAB 0x0B
85 #define SPACE 0x20 87 #define SPACE 0x20
86 88
87 enum { 89 enum {
88 HWKANA_START=0xff61, 90 HWKANA_START=0xff61,
89 HWKANA_END=0xff9f 91 HWKANA_END=0xff9f
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 CNS_11643_1, 147 CNS_11643_1,
146 CNS_11643_2, 148 CNS_11643_2,
147 CNS_11643_3, 149 CNS_11643_3,
148 CNS_11643_4, 150 CNS_11643_4,
149 CNS_11643_5, 151 CNS_11643_5,
150 CNS_11643_6, 152 CNS_11643_6,
151 CNS_11643_7 153 CNS_11643_7
152 } StateEnum; 154 } StateEnum;
153 155
154 /* is the StateEnum charset value for a DBCS charset? */ 156 /* is the StateEnum charset value for a DBCS charset? */
155 #if UCONFIG_NO_NON_HTML5_CONVERSION 157 #if UCONFIG_ONLY_HTML_CONVERSION
156 #define IS_JP_DBCS(cs) (JISX208==(cs)) 158 #define IS_JP_DBCS(cs) (JISX208==(cs))
157 #else 159 #else
158 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 160 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
159 #endif 161 #endif
160 162
161 #define CSM(cs) ((uint16_t)1<<(cs)) 163 #define CSM(cs) ((uint16_t)1<<(cs))
162 164
163 /* 165 /*
164 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence 166 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence
165 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x 167 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x
166 * 168 *
167 * Note: The converter uses some leniency: 169 * Note: The converter uses some leniency:
168 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in 170 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
169 * all versions, not just JIS7 and JIS8. 171 * all versions, not just JIS7 and JIS8.
170 * - ICU does not distinguish between different versions of JIS X 0208. 172 * - ICU does not distinguish between different versions of JIS X 0208.
171 */ 173 */
172 #if UCONFIG_NO_NON_HTML5_CONVERSION 174 #if UCONFIG_ONLY_HTML_CONVERSION
173 enum { MAX_JA_VERSION=0 }; 175 enum { MAX_JA_VERSION=0 };
174 #else 176 #else
175 enum { MAX_JA_VERSION=4 }; 177 enum { MAX_JA_VERSION=4 };
176 #endif 178 #endif
177 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 179 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
178 /*
179 * TODO(jshin): The encoding spec has JISX212, but we don't support it.
180 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
181 */
182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 180 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
183 #if !UCONFIG_NO_NON_HTML5_CONVERSION 181 #if !UCONFIG_ONLY_HTML_CONVERSION
184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 183 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
188 #endif 186 #endif
189 }; 187 };
190 188
191 typedef enum { 189 typedef enum {
192 ASCII1=0, 190 ASCII1=0,
193 LATIN1, 191 LATIN1,
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 368 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 369 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
373 }; 371 };
374 372
375 /* Type def for refactoring changeState_2022 code*/ 373 /* Type def for refactoring changeState_2022 code*/
376 typedef enum{ 374 typedef enum{
377 #ifdef U_ENABLE_GENERIC_ISO_2022 375 #ifdef U_ENABLE_GENERIC_ISO_2022
378 ISO_2022=0, 376 ISO_2022=0,
379 #endif 377 #endif
380 #if UCONFIG_NO_NON_HTML5_CONVERSION
381 ISO_2022_JP=1
382 #else
383 ISO_2022_JP=1, 378 ISO_2022_JP=1,
379 #if !UCONFIG_ONLY_HTML_CONVERSION
384 ISO_2022_KR=2, 380 ISO_2022_KR=2,
385 ISO_2022_CN=3 381 ISO_2022_CN=3
386 #endif 382 #endif
387 } Variant2022; 383 } Variant2022;
388 384
389 /*********** ISO 2022 Converter Protos ***********/ 385 /*********** ISO 2022 Converter Protos ***********/
390 static void 386 static void
391 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 387 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
392 388
393 static void 389 static void
(...skipping 13 matching lines...) Expand all
407 403
408 #ifdef U_ENABLE_GENERIC_ISO_2022 404 #ifdef U_ENABLE_GENERIC_ISO_2022
409 static void 405 static void
410 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UEr rorCode* err); 406 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UEr rorCode* err);
411 #endif 407 #endif
412 408
413 namespace { 409 namespace {
414 410
415 /*const UConverterSharedData _ISO2022Data;*/ 411 /*const UConverterSharedData _ISO2022Data;*/
416 extern const UConverterSharedData _ISO2022JPData; 412 extern const UConverterSharedData _ISO2022JPData;
413
414 #if !UCONFIG_ONLY_HTML_CONVERSION
417 extern const UConverterSharedData _ISO2022KRData; 415 extern const UConverterSharedData _ISO2022KRData;
418 extern const UConverterSharedData _ISO2022CNData; 416 extern const UConverterSharedData _ISO2022CNData;
417 #endif
419 418
420 } // namespace 419 } // namespace
421 420
422 /*************** Converter implementations ******************/ 421 /*************** Converter implementations ******************/
423 422
424 /* The purpose of this function is to get around gcc compiler warnings. */ 423 /* The purpose of this function is to get around gcc compiler warnings. */
425 static inline void 424 static inline void
426 fromUWriteUInt8(UConverter *cnv, 425 fromUWriteUInt8(UConverter *cnv,
427 const char *bytes, int32_t length, 426 const char *bytes, int32_t length,
428 uint8_t **target, const char *targetLimit, 427 uint8_t **target, const char *targetLimit,
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
487 myConverterData->currentType = ASCII1; 486 myConverterData->currentType = ASCII1;
488 cnv->fromUnicodeStatus =FALSE; 487 cnv->fromUnicodeStatus =FALSE;
489 if(pArgs->locale){ 488 if(pArgs->locale){
490 uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)); 489 uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
491 } 490 }
492 version = pArgs->options & UCNV_OPTIONS_VERSION_MASK; 491 version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
493 myConverterData->version = version; 492 myConverterData->version = version;
494 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && 493 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
495 (myLocale[2]=='_' || myLocale[2]=='\0')) 494 (myLocale[2]=='_' || myLocale[2]=='\0'))
496 { 495 {
497 size_t len=0;
498 /* open the required converters and cache them */ 496 /* open the required converters and cache them */
499 if(version>MAX_JA_VERSION) { 497 if(version>MAX_JA_VERSION) {
500 /* prevent indexing beyond jpCharsetMasks[] */ 498 // ICU 55 fails to open a converter for an unsupported version.
501 myConverterData->version = version = 0; 499 // Previously, it fell back to version 0, but that would yield
500 // unexpected behavior.
501 *errorCode = U_MISSING_RESOURCE_ERROR;
502 return;
502 } 503 }
503 #if !UCONFIG_NO_NON_HTML5_CONVERSION
504 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 504 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
505 myConverterData->myConverterArray[ISO8859_7] = 505 myConverterData->myConverterArray[ISO8859_7] =
506 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode); 506 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode);
507 } 507 }
508 #endif
509 myConverterData->myConverterArray[JISX208] = 508 myConverterData->myConverterArray[JISX208] =
510 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code); 509 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code);
511 #if !UCONFIG_NO_NON_HTML5_CONVERSION
512 if(jpCharsetMasks[version]&CSM(JISX212)) { 510 if(jpCharsetMasks[version]&CSM(JISX212)) {
513 myConverterData->myConverterArray[JISX212] = 511 myConverterData->myConverterArray[JISX212] =
514 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode); 512 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode);
515 } 513 }
516 if(jpCharsetMasks[version]&CSM(GB2312)) { 514 if(jpCharsetMasks[version]&CSM(GB2312)) {
517 myConverterData->myConverterArray[GB2312] = 515 myConverterData->myConverterArray[GB2312] =
518 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */ 516 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */
519 } 517 }
520 if(jpCharsetMasks[version]&CSM(KSC5601)) { 518 if(jpCharsetMasks[version]&CSM(KSC5601)) {
521 myConverterData->myConverterArray[KSC5601] = 519 myConverterData->myConverterArray[KSC5601] =
522 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode); 520 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode);
523 } 521 }
524 #endif
525 522
526 /* set the function pointers to appropriate funtions */ 523 /* set the function pointers to appropriate funtions */
527 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 524 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
528 uprv_strcpy(myConverterData->locale,"ja"); 525 uprv_strcpy(myConverterData->locale,"ja");
529 526
530 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= "); 527 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= ");
531 len = uprv_strlen(myConverterData->name); 528 size_t len = uprv_strlen(myConverterData->name);
532 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ; 529 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ;
533 myConverterData->name[len+1]='\0'; 530 myConverterData->name[len+1]='\0';
534 } 531 }
535 #if !UCONFIG_NO_NON_HTML5_CONVERSION 532 #if !UCONFIG_ONLY_HTML_CONVERSION
536 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 533 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
537 (myLocale[2]=='_' || myLocale[2]=='\0')) 534 (myLocale[2]=='_' || myLocale[2]=='\0'))
538 { 535 {
536 if(version>1) {
537 // ICU 55 fails to open a converter for an unsupported version.
538 // Previously, it fell back to version 0, but that would yield
539 // unexpected behavior.
540 *errorCode = U_MISSING_RESOURCE_ERROR;
541 return;
542 }
539 const char *cnvName; 543 const char *cnvName;
540 if(version==1) { 544 if(version==1) {
541 cnvName="icu-internal-25546"; 545 cnvName="icu-internal-25546";
542 } else { 546 } else {
543 cnvName="ibm-949"; 547 cnvName="ibm-949";
544 myConverterData->version=version=0; 548 myConverterData->version=version=0;
545 } 549 }
546 if(pArgs->onlyTestIsLoadable) { 550 if(pArgs->onlyTestIsLoadable) {
547 ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carri es result */ 551 ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carri es result */
548 uprv_free(cnv->extraInfo); 552 uprv_free(cnv->extraInfo);
(...skipping 19 matching lines...) Expand all
568 setInitialStateFromUnicodeKR(cnv, myConverterData); 572 setInitialStateFromUnicodeKR(cnv, myConverterData);
569 573
570 /* set the function pointers to appropriate funtions */ 574 /* set the function pointers to appropriate funtions */
571 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; 575 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
572 uprv_strcpy(myConverterData->locale,"ko"); 576 uprv_strcpy(myConverterData->locale,"ko");
573 } 577 }
574 } 578 }
575 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m yLocale[1]=='n'))&& 579 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m yLocale[1]=='n'))&&
576 (myLocale[2]=='_' || myLocale[2]=='\0')) 580 (myLocale[2]=='_' || myLocale[2]=='\0'))
577 { 581 {
582 if(version>2) {
583 // ICU 55 fails to open a converter for an unsupported version.
584 // Previously, it fell back to version 0, but that would yield
585 // unexpected behavior.
586 *errorCode = U_MISSING_RESOURCE_ERROR;
587 return;
588 }
578 589
579 /* open the required converters and cache them */ 590 /* open the required converters and cache them */
580 myConverterData->myConverterArray[GB2312_1] = 591 myConverterData->myConverterArray[GB2312_1] =
581 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorC ode); 592 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorC ode);
582 if(version==1) { 593 if(version==1) {
583 myConverterData->myConverterArray[ISO_IR_165] = 594 myConverterData->myConverterArray[ISO_IR_165] =
584 ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode); 595 ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
585 } 596 }
586 myConverterData->myConverterArray[CNS_11643] = 597 myConverterData->myConverterArray[CNS_11643] =
587 ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode); 598 ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
588 599
589 600
590 /* set the function pointers to appropriate funtions */ 601 /* set the function pointers to appropriate funtions */
591 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; 602 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
592 uprv_strcpy(myConverterData->locale,"cn"); 603 uprv_strcpy(myConverterData->locale,"cn");
593 604
594 if (version==0){ 605 if (version==0){
595 myConverterData->version = 0; 606 myConverterData->version = 0;
596 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0"); 607 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0");
597 }else if (version==1){ 608 }else if (version==1){
598 myConverterData->version = 1; 609 myConverterData->version = 1;
599 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1"); 610 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1");
600 }else { 611 }else {
601 myConverterData->version = 2; 612 myConverterData->version = 2;
602 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2"); 613 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2");
603 } 614 }
604 } 615 }
605 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION 616 #endif // !UCONFIG_ONLY_HTML_CONVERSION
606 else{ 617 else{
607 #ifdef U_ENABLE_GENERIC_ISO_2022 618 #ifdef U_ENABLE_GENERIC_ISO_2022
608 myConverterData->isFirstBuffer = TRUE; 619 myConverterData->isFirstBuffer = TRUE;
609 620
610 /* append the UTF-8 escape sequence */ 621 /* append the UTF-8 escape sequence */
611 cnv->charErrorBufferLength = 3; 622 cnv->charErrorBufferLength = 3;
612 cnv->charErrorBuffer[0] = 0x1b; 623 cnv->charErrorBuffer[0] = 0x1b;
613 cnv->charErrorBuffer[1] = 0x25; 624 cnv->charErrorBuffer[1] = 0x25;
614 cnv->charErrorBuffer[2] = 0x42; 625 cnv->charErrorBuffer[2] = 0x42;
615 626
616 cnv->sharedData=(UConverterSharedData*)&_ISO2022Data; 627 cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
617 /* initialize the state variables */ 628 /* initialize the state variables */
618 uprv_strcpy(myConverterData->name,"ISO_2022"); 629 uprv_strcpy(myConverterData->name,"ISO_2022");
619 #else 630 #else
620 *errorCode = U_UNSUPPORTED_ERROR; 631 *errorCode = U_MISSING_RESOURCE_ERROR;
632 // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
633 // data loading error code.
621 return; 634 return;
622 #endif 635 #endif
623 } 636 }
624 637
625 cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar; 638 cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
626 639
627 if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { 640 if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
628 _ISO2022Close(cnv); 641 _ISO2022Close(cnv);
629 } 642 }
630 } else { 643 } else {
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
730 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 743 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
731 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE 744 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE
732 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 745 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
733 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE 746 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE
734 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
735 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 749 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
738 }; 751 };
739 752
740 #if !UCONFIG_NO_NON_HTML5_CONVERSION 753 #if !UCONFIG_ONLY_HTML_CONVERSION
741 /*************** to unicode *******************/ 754 /*************** to unicode *******************/
742 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 755 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
743 /* 0 1 2 3 4 5 6 7 8 9 */ 756 /* 0 1 2 3 4 5 6 7 8 9 */
744 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 757 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
745 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 758 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
746 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 759 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 760 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 761 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
749 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 762 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 763 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
896 if(myData2022->toU2022State.g<2) { 909 if(myData2022->toU2022State.g<2) {
897 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g; 910 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;
898 } 911 }
899 myData2022->toU2022State.g=2; 912 myData2022->toU2022State.g=2;
900 } else { 913 } else {
901 /* illegal to have SS2 before a matching designator */ 914 /* illegal to have SS2 before a matching designator */
902 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 915 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
903 } 916 }
904 break; 917 break;
905 /* case SS3_STATE: not used in ISO-2022-JP-x */ 918 /* case SS3_STATE: not used in ISO-2022-JP-x */
906 #if !UCONFIG_NO_NON_HTML5_CONVERSION
907 case ISO8859_1: 919 case ISO8859_1:
908 case ISO8859_7: 920 case ISO8859_7:
909 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 921 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
910 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 922 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
911 } else { 923 } else {
912 /* G2 charset for SS2 */ 924 /* G2 charset for SS2 */
913 myData2022->toU2022State.cs[2]=(int8_t)tempState; 925 myData2022->toU2022State.cs[2]=(int8_t)tempState;
914 } 926 }
915 break; 927 break;
916 #endif
917 default: 928 default:
918 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 929 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
919 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 930 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
920 } else { 931 } else {
921 /* G0 charset */ 932 /* G0 charset */
922 myData2022->toU2022State.cs[0]=(int8_t)tempState; 933 myData2022->toU2022State.cs[0]=(int8_t)tempState;
923 } 934 }
924 break; 935 break;
925 } 936 }
926 } 937 }
927 break; 938 break;
928 #if !UCONFIG_NO_NON_HTML5_CONVERSION 939 #if !UCONFIG_ONLY_HTML_CONVERSION
929 case ISO_2022_CN: 940 case ISO_2022_CN:
930 { 941 {
931 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 942 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
932 switch(tempState) { 943 switch(tempState) {
933 case INVALID_STATE: 944 case INVALID_STATE:
934 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 945 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
935 break; 946 break;
936 case SS2_STATE: 947 case SS2_STATE:
937 if(myData2022->toU2022State.cs[2]!=0) { 948 if(myData2022->toU2022State.cs[2]!=0) {
938 if(myData2022->toU2022State.g<2) { 949 if(myData2022->toU2022State.g<2) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
980 } 991 }
981 } 992 }
982 break; 993 break;
983 case ISO_2022_KR: 994 case ISO_2022_KR:
984 if(offset==0x30){ 995 if(offset==0x30){
985 /* nothing to be done, just accept this one escape sequence */ 996 /* nothing to be done, just accept this one escape sequence */
986 } else { 997 } else {
987 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 998 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
988 } 999 }
989 break; 1000 break;
990 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 1001 #endif // !UCONFIG_ONLY_HTML_CONVERSION
991 1002
992 default: 1003 default:
993 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 1004 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
994 break; 1005 break;
995 } 1006 }
996 } 1007 }
997 if(U_SUCCESS(*err)) { 1008 if(U_SUCCESS(*err)) {
998 _this->toULength = 0; 1009 _this->toULength = 0;
999 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { 1010 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
1000 if(_this->toULength>1) { 1011 if(_this->toULength>1) {
(...skipping 22 matching lines...) Expand all
1023 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng th); 1034 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng th);
1024 *source-=bytesFromThisBuffer; 1035 *source-=bytesFromThisBuffer;
1025 } 1036 }
1026 _this->toULength=1; 1037 _this->toULength=1;
1027 } 1038 }
1028 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { 1039 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
1029 _this->toUCallbackReason = UCNV_UNASSIGNED; 1040 _this->toUCallbackReason = UCNV_UNASSIGNED;
1030 } 1041 }
1031 } 1042 }
1032 1043
1044 #if !UCONFIG_ONLY_HTML_CONVERSION
1033 /*Checks the characters of the buffer against valid 2022 escape sequences 1045 /*Checks the characters of the buffer against valid 2022 escape sequences
1034 *if the match we return a pointer to the initial start of the sequence otherwise 1046 *if the match we return a pointer to the initial start of the sequence otherwise
1035 *we return sourceLimit 1047 *we return sourceLimit
1036 */ 1048 */
1037 /*for 2022 looks ahead in the stream 1049 /*for 2022 looks ahead in the stream
1038 *to determine the longest possible convertible 1050 *to determine the longest possible convertible
1039 *data stream 1051 *data stream
1040 */ 1052 */
1041 static inline const char* 1053 static inline const char*
1042 getEndOfBuffer_2022(const char** source, 1054 getEndOfBuffer_2022(const char** source,
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
1077 }while (++mySource < sourceLimit); 1089 }while (++mySource < sourceLimit);
1078 1090
1079 return sourceLimit; 1091 return sourceLimit;
1080 #else 1092 #else
1081 while(mySource < sourceLimit && *mySource != ESC_2022) { 1093 while(mySource < sourceLimit && *mySource != ESC_2022) {
1082 ++mySource; 1094 ++mySource;
1083 } 1095 }
1084 return mySource; 1096 return mySource;
1085 #endif 1097 #endif
1086 } 1098 }
1087 1099 #endif
1088 1100
1089 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmb cs.c 1101 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmb cs.c
1090 * any future change in _MBCSFromUChar32() function should be reflected here. 1102 * any future change in _MBCSFromUChar32() function should be reflected here.
1091 * @return number of bytes in *value; negative number if fallback; 0 if no mappi ng 1103 * @return number of bytes in *value; negative number if fallback; 0 if no mappi ng
1092 */ 1104 */
1093 static inline int32_t 1105 static inline int32_t
1094 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, 1106 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
1095 UChar32 c, 1107 UChar32 c,
1096 uint32_t* value, 1108 uint32_t* value,
1097 UBool useFallback, 1109 UBool useFallback,
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after
1401 * KSC5601 : alias to ibm-949 mapping table 1413 * KSC5601 : alias to ibm-949 mapping table
1402 * GB2312 : alias to ibm-1386 mapping table 1414 * GB2312 : alias to ibm-1386 mapping table
1403 * ISO-8859-1 : Algorithmic implemented as LATIN1 case 1415 * ISO-8859-1 : Algorithmic implemented as LATIN1 case
1404 * ISO-8859-7 : alisas to ibm-9409 mapping table 1416 * ISO-8859-7 : alisas to ibm-9409 mapping table
1405 */ 1417 */
1406 1418
1407 /* preference order of JP charsets */ 1419 /* preference order of JP charsets */
1408 static const StateEnum jpCharsetPref[]={ 1420 static const StateEnum jpCharsetPref[]={
1409 ASCII, 1421 ASCII,
1410 JISX201, 1422 JISX201,
1411 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1412 ISO8859_1, 1423 ISO8859_1,
1424 JISX208,
1413 ISO8859_7, 1425 ISO8859_7,
1414 #endif
1415 JISX208,
1416 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1417 JISX212, 1426 JISX212,
1418 GB2312, 1427 GB2312,
1419 KSC5601, 1428 KSC5601,
1420 #endif
1421 HWKANA_7BIT 1429 HWKANA_7BIT
1422 }; 1430 };
1423 1431
1424 /* 1432 /*
1425 * The escape sequences must be in order of the enum constants like JISX201 = 3 , 1433 * The escape sequences must be in order of the enum constants like JISX201 = 3 ,
1426 * not in order of jpCharsetPref[]! 1434 * not in order of jpCharsetPref[]!
1427 */ 1435 */
1428 static const char escSeqChars[][6] ={ 1436 static const char escSeqChars[][6] ={
1429 "\x1B\x28\x42", /* <ESC>(B ASCII */ 1437 "\x1B\x28\x42", /* <ESC>(B ASCII */
1430 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ 1438 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after
1780 int8_t cs0 = choices[i]; 1788 int8_t cs0 = choices[i];
1781 switch(cs0) { 1789 switch(cs0) {
1782 case ASCII: 1790 case ASCII:
1783 if(sourceChar <= 0x7f) { 1791 if(sourceChar <= 0x7f) {
1784 targetValue = (uint32_t)sourceChar; 1792 targetValue = (uint32_t)sourceChar;
1785 len = 1; 1793 len = 1;
1786 cs = cs0; 1794 cs = cs0;
1787 g = 0; 1795 g = 0;
1788 } 1796 }
1789 break; 1797 break;
1790 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1791 case ISO8859_1: 1798 case ISO8859_1:
1792 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 1799 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1793 targetValue = (uint32_t)sourceChar - 0x80; 1800 targetValue = (uint32_t)sourceChar - 0x80;
1794 len = 1; 1801 len = 1;
1795 cs = cs0; 1802 cs = cs0;
1796 g = 2; 1803 g = 2;
1797 } 1804 }
1798 break; 1805 break;
1799 #endif
1800 case HWKANA_7BIT: 1806 case HWKANA_7BIT:
1801 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) { 1807 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) {
1802 if(converterData->version==3) { 1808 if(converterData->version==3) {
1803 /* JIS7: use G1 (SO) */ 1809 /* JIS7: use G1 (SO) */
1804 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ 1810 /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1805 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); 1811 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
1806 len = 1; 1812 len = 1;
1807 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ 1813 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
1808 g = 1; 1814 g = 1;
1809 } else if(converterData->version==4) { 1815 } else if(converterData->version==4) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1851 } 1857 }
1852 } else if(len == 0 && useFallback && 1858 } else if(len == 0 && useFallback &&
1853 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) { 1859 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) {
1854 targetValue = hwkana_fb[sourceChar - HWKANA_START]; 1860 targetValue = hwkana_fb[sourceChar - HWKANA_START];
1855 len = -2; 1861 len = -2;
1856 cs = cs0; 1862 cs = cs0;
1857 g = 0; 1863 g = 0;
1858 useFallback = FALSE; 1864 useFallback = FALSE;
1859 } 1865 }
1860 break; 1866 break;
1861 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1862 case ISO8859_7: 1867 case ISO8859_7:
1863 /* G0 SBCS forced to 7-bit output */ 1868 /* G0 SBCS forced to 7-bit output */
1864 len2 = MBCS_SINGLE_FROM_UCHAR32( 1869 len2 = MBCS_SINGLE_FROM_UCHAR32(
1865 converterData->myConverterArray[cs0], 1870 converterData->myConverterArray[cs0],
1866 sourceChar, &value, 1871 sourceChar, &value,
1867 useFallback); 1872 useFallback);
1868 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) { 1873 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) {
1869 targetValue = value - 0x80; 1874 targetValue = value - 0x80;
1870 len = len2; 1875 len = len2;
1871 cs = cs0; 1876 cs = cs0;
1872 g = 2; 1877 g = 2;
1873 useFallback = FALSE; 1878 useFallback = FALSE;
1874 } 1879 }
1875 break; 1880 break;
1876 #endif
1877 default: 1881 default:
1878 /* G0 DBCS */ 1882 /* G0 DBCS */
1879 len2 = MBCS_FROM_UCHAR32_ISO2022( 1883 len2 = MBCS_FROM_UCHAR32_ISO2022(
1880 converterData->myConverterArray[cs0], 1884 converterData->myConverterArray[cs0],
1881 sourceChar, &value, 1885 sourceChar, &value,
1882 useFallback, MBCS_OUTPUT_2); 1886 useFallback, MBCS_OUTPUT_2);
1883 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1887 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1884 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1885 if(cs0 == KSC5601) { 1888 if(cs0 == KSC5601) {
1886 /* 1889 /*
1887 * Check for valid bytes for the encoding scheme. 1890 * Check for valid bytes for the encoding scheme.
1888 * This is necessary because the sub-converter (wind ows-949) 1891 * This is necessary because the sub-converter (wind ows-949)
1889 * has a broader encoding scheme than is valid for 2 022. 1892 * has a broader encoding scheme than is valid for 2 022.
1890 */ 1893 */
1891 value = _2022FromGR94DBCS(value); 1894 value = _2022FromGR94DBCS(value);
1892 if(value == 0) { 1895 if(value == 0) {
1893 break; 1896 break;
1894 } 1897 }
1895 } 1898 }
1896 #endif
1897 targetValue = value; 1899 targetValue = value;
1898 len = len2; 1900 len = len2;
1899 cs = cs0; 1901 cs = cs0;
1900 g = 0; 1902 g = 0;
1901 useFallback = FALSE; 1903 useFallback = FALSE;
1902 } 1904 }
1903 break; 1905 break;
1904 } 1906 }
1905 } 1907 }
1906 1908
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
2180 /* return from a single-shift state to the previous one */ 2182 /* return from a single-shift state to the previous one */
2181 if(pToU2022State->g >= 2) { 2183 if(pToU2022State->g >= 2) {
2182 pToU2022State->g=pToU2022State->prevG; 2184 pToU2022State->g=pToU2022State->prevG;
2183 } 2185 }
2184 } else switch(cs) { 2186 } else switch(cs) {
2185 case ASCII: 2187 case ASCII:
2186 if(mySourceChar <= 0x7f) { 2188 if(mySourceChar <= 0x7f) {
2187 targetUniChar = mySourceChar; 2189 targetUniChar = mySourceChar;
2188 } 2190 }
2189 break; 2191 break;
2190 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2191 case ISO8859_1: 2192 case ISO8859_1:
2192 if(mySourceChar <= 0x7f) { 2193 if(mySourceChar <= 0x7f) {
2193 targetUniChar = mySourceChar + 0x80; 2194 targetUniChar = mySourceChar + 0x80;
2194 } 2195 }
2195 /* return from a single-shift state to the previous one */ 2196 /* return from a single-shift state to the previous one */
2196 pToU2022State->g=pToU2022State->prevG; 2197 pToU2022State->g=pToU2022State->prevG;
2197 break; 2198 break;
2198 case ISO8859_7: 2199 case ISO8859_7:
2199 if(mySourceChar <= 0x7f) { 2200 if(mySourceChar <= 0x7f) {
2200 /* convert mySourceChar+0x80 to use a normal 8-bit table */ 2201 /* convert mySourceChar+0x80 to use a normal 8-bit table */
2201 targetUniChar = 2202 targetUniChar =
2202 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( 2203 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
2203 myData->myConverterArray[cs], 2204 myData->myConverterArray[cs],
2204 mySourceChar + 0x80); 2205 mySourceChar + 0x80);
2205 } 2206 }
2206 /* return from a single-shift state to the previous one */ 2207 /* return from a single-shift state to the previous one */
2207 pToU2022State->g=pToU2022State->prevG; 2208 pToU2022State->g=pToU2022State->prevG;
2208 break; 2209 break;
2209 #endif
2210 case JISX201: 2210 case JISX201:
2211 if(mySourceChar <= 0x7f) { 2211 if(mySourceChar <= 0x7f) {
2212 targetUniChar = jisx201ToU(mySourceChar); 2212 targetUniChar = jisx201ToU(mySourceChar);
2213 } 2213 }
2214 break; 2214 break;
2215 case HWKANA_7BIT: 2215 case HWKANA_7BIT:
2216 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { 2216 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
2217 /* 7-bit halfwidth Katakana */ 2217 /* 7-bit halfwidth Katakana */
2218 targetUniChar = mySourceChar + (HWKANA_START - 0x21); 2218 targetUniChar = mySourceChar + (HWKANA_START - 0x21);
2219 } 2219 }
(...skipping 19 matching lines...) Expand all
2239 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ; 2239 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ;
2240 if (leadIsOk && trailIsOk) { 2240 if (leadIsOk && trailIsOk) {
2241 ++mySource; 2241 ++mySource;
2242 tmpSourceChar = (mySourceChar << 8) | trailByte; 2242 tmpSourceChar = (mySourceChar << 8) | trailByte;
2243 if(cs == JISX208) { 2243 if(cs == JISX208) {
2244 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf); 2244 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf);
2245 mySourceChar = tmpSourceChar; 2245 mySourceChar = tmpSourceChar;
2246 } else { 2246 } else {
2247 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */ 2247 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */
2248 mySourceChar = tmpSourceChar; 2248 mySourceChar = tmpSourceChar;
2249 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2250 if (cs == KSC5601) { 2249 if (cs == KSC5601) {
2251 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */ 2250 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */
2252 } 2251 }
2253 #endif
2254 tempBuf[0] = (char)(tmpSourceChar >> 8); 2252 tempBuf[0] = (char)(tmpSourceChar >> 8);
2255 tempBuf[1] = (char)(tmpSourceChar); 2253 tempBuf[1] = (char)(tmpSourceChar);
2256 } 2254 }
2257 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE); 2255 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE);
2258 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 2256 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
2259 /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 2257 /* report a pair of illegal bytes if the second byte is not a DBCS starter */
2260 ++mySource; 2258 ++mySource;
2261 /* add another bit so that the code below writes 2 b ytes in case of error */ 2259 /* add another bit so that the code below writes 2 b ytes in case of error */
2262 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte; 2260 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte;
2263 } 2261 }
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2305 *err =U_BUFFER_OVERFLOW_ERROR; 2303 *err =U_BUFFER_OVERFLOW_ERROR;
2306 break; 2304 break;
2307 } 2305 }
2308 } 2306 }
2309 endloop: 2307 endloop:
2310 args->target = myTarget; 2308 args->target = myTarget;
2311 args->source = mySource; 2309 args->source = mySource;
2312 } 2310 }
2313 2311
2314 2312
2315 #if !UCONFIG_NO_NON_HTML5_CONVERSION 2313 #if !UCONFIG_ONLY_HTML_CONVERSION
2316 /*************************************************************** 2314 /***************************************************************
2317 * Rules for ISO-2022-KR encoding 2315 * Rules for ISO-2022-KR encoding
2318 * i) The KSC5601 designator sequence should appear only once in a file, 2316 * i) The KSC5601 designator sequence should appear only once in a file,
2319 * at the begining of a line before any KSC5601 characters. This usually 2317 * at the begining of a line before any KSC5601 characters. This usually
2320 * means that it appears by itself on the first line of the file 2318 * means that it appears by itself on the first line of the file
2321 * ii) There are only 2 shifting sequences SO to shift into double byte mode 2319 * ii) There are only 2 shifting sequences SO to shift into double byte mode
2322 * and SI to shift into single byte mode 2320 * and SI to shift into single byte mode
2323 */ 2321 */
2324 static void 2322 static void
2325 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2323 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after
3449 } 3447 }
3450 else{ 3448 else{
3451 *err =U_BUFFER_OVERFLOW_ERROR; 3449 *err =U_BUFFER_OVERFLOW_ERROR;
3452 break; 3450 break;
3453 } 3451 }
3454 } 3452 }
3455 endloop: 3453 endloop:
3456 args->target = myTarget; 3454 args->target = myTarget;
3457 args->source = mySource; 3455 args->source = mySource;
3458 } 3456 }
3459 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 3457 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3460 3458
3461 static void 3459 static void
3462 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) { 3460 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) {
3463 UConverter *cnv = args->converter; 3461 UConverter *cnv = args->converter;
3464 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo; 3462 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo;
3465 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; 3463 ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3466 char *p, *subchar; 3464 char *p, *subchar;
3467 char buffer[8]; 3465 char buffer[8];
3468 int32_t length; 3466 int32_t length;
3469 3467
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
3651 #endif 3649 #endif
3652 3650
3653 cnvData = (UConverterDataISO2022*)cnv->extraInfo; 3651 cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3654 3652
3655 /* open a set and initialize it with code points that are algorithmically ro und-tripped */ 3653 /* open a set and initialize it with code points that are algorithmically ro und-tripped */
3656 switch(cnvData->locale[0]){ 3654 switch(cnvData->locale[0]){
3657 case 'j': 3655 case 'j':
3658 /* include JIS X 0201 which is hardcoded */ 3656 /* include JIS X 0201 which is hardcoded */
3659 sa->add(sa->set, 0xa5); 3657 sa->add(sa->set, 0xa5);
3660 sa->add(sa->set, 0x203e); 3658 sa->add(sa->set, 0x203e);
3661 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3662 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3659 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3663 /* include Latin-1 for some variants of JP */ 3660 /* include Latin-1 for some variants of JP */
3664 sa->addRange(sa->set, 0, 0xff); 3661 sa->addRange(sa->set, 0, 0xff);
3665 } else { 3662 } else {
3666 /* include ASCII for JP */ 3663 /* include ASCII for JP */
3667 sa->addRange(sa->set, 0, 0x7f); 3664 sa->addRange(sa->set, 0, 0x7f);
3668 } 3665 }
3669 #else
3670 /* include ASCII for JP */
3671 sa->addRange(sa->set, 0, 0x7f);
3672 #endif
3673 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) { 3666 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) {
3674 /* 3667 /*
3675 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0 3668 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0
3676 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) 3669 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3677 * use half-width Katakana. 3670 * use half-width Katakana.
3678 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) 3671 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3679 * half-width Katakana via the ESC ( I sequence. 3672 * half-width Katakana via the ESC ( I sequence.
3680 * However, we only emit (fromUnicode) half-width Katakana according to the 3673 * However, we only emit (fromUnicode) half-width Katakana according to the
3681 * definition of each variant. 3674 * definition of each variant.
3682 * 3675 *
3683 * When including fallbacks, 3676 * When including fallbacks,
3684 * we need to include half-width Katakana Unicode code points for al l JP variants because 3677 * we need to include half-width Katakana Unicode code points for al l JP variants because
3685 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana). 3678 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana).
3686 */ 3679 */
3687 /* include half-width Katakana for JP */ 3680 /* include half-width Katakana for JP */
3688 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3681 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3689 } 3682 }
3690 break; 3683 break;
3691 #if !UCONFIG_NO_NON_HTML5_CONVERSION 3684 #if !UCONFIG_ONLY_HTML_CONVERSION
3692 case 'c': 3685 case 'c':
3693 case 'z': 3686 case 'z':
3694 /* include ASCII for CN */ 3687 /* include ASCII for CN */
3695 sa->addRange(sa->set, 0, 0x7f); 3688 sa->addRange(sa->set, 0, 0x7f);
3696 break; 3689 break;
3697 case 'k': 3690 case 'k':
3698 /* there is only one converter for KR, and it is not in the myConverterA rray[] */ 3691 /* there is only one converter for KR, and it is not in the myConverterA rray[] */
3699 cnvData->currentConverter->sharedData->impl->getUnicodeSet( 3692 cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3700 cnvData->currentConverter, sa, which, pErrorCode); 3693 cnvData->currentConverter, sa, which, pErrorCode);
3701 /* the loop over myConverterArray[] will simply not find another convert er */ 3694 /* the loop over myConverterArray[] will simply not find another convert er */
(...skipping 18 matching lines...) Expand all
3720 3713
3721 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 3714 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
3722 UConverterSetFilter filter; 3715 UConverterSetFilter filter;
3723 if(cnvData->myConverterArray[i]!=NULL) { 3716 if(cnvData->myConverterArray[i]!=NULL) {
3724 if(cnvData->locale[0]=='j' && i==JISX208) { 3717 if(cnvData->locale[0]=='j' && i==JISX208) {
3725 /* 3718 /*
3726 * Only add code points that map to Shift-JIS codes 3719 * Only add code points that map to Shift-JIS codes
3727 * corresponding to JIS X 0208. 3720 * corresponding to JIS X 0208.
3728 */ 3721 */
3729 filter=UCNV_SET_FILTER_SJIS; 3722 filter=UCNV_SET_FILTER_SJIS;
3730 #if !UCONFIG_NO_NON_HTML5_CONVERSION 3723 #if !UCONFIG_ONLY_HTML_CONVERSION
3731 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3724 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3732 cnvData->version==0 && i==CNS_11643) { 3725 cnvData->version==0 && i==CNS_11643) {
3733 /* 3726 /*
3734 * Version-specific for CN: 3727 * Version-specific for CN:
3735 * CN version 0 does not map CNS planes 3..7 although 3728 * CN version 0 does not map CNS planes 3..7 although
3736 * they are all available in the CNS conversion table; 3729 * they are all available in the CNS conversion table;
3737 * CN version 1 (-EXT) does map them all. 3730 * CN version 1 (-EXT) does map them all.
3738 * The two versions create different Unicode sets. 3731 * The two versions create different Unicode sets.
3739 */ 3732 */
3740 filter=UCNV_SET_FILTER_2022_CN; 3733 filter=UCNV_SET_FILTER_2022_CN;
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
3859 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3852 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3860 }; 3853 };
3861 3854
3862 namespace { 3855 namespace {
3863 3856
3864 const UConverterSharedData _ISO2022JPData= 3857 const UConverterSharedData _ISO2022JPData=
3865 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022J PImpl); 3858 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022J PImpl);
3866 3859
3867 } // namespace 3860 } // namespace
3868 3861
3869 #if !UCONFIG_NO_NON_HTML5_CONVERSION 3862 #if !UCONFIG_ONLY_HTML_CONVERSION
3870 /************* KR ***************/ 3863 /************* KR ***************/
3871 static const UConverterImpl _ISO2022KRImpl={ 3864 static const UConverterImpl _ISO2022KRImpl={
3872 UCNV_ISO_2022, 3865 UCNV_ISO_2022,
3873 3866
3874 NULL, 3867 NULL,
3875 NULL, 3868 NULL,
3876 3869
3877 _ISO2022Open, 3870 _ISO2022Open,
3878 _ISO2022Close, 3871 _ISO2022Close,
3879 _ISO2022Reset, 3872 _ISO2022Reset,
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
3960 0, 3953 0,
3961 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3954 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3962 }; 3955 };
3963 3956
3964 namespace { 3957 namespace {
3965 3958
3966 const UConverterSharedData _ISO2022CNData= 3959 const UConverterSharedData _ISO2022CNData=
3967 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022C NImpl); 3960 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022C NImpl);
3968 3961
3969 } // namespace 3962 } // namespace
3970 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 3963 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3971 3964
3972 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 3965 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
OLDNEW
« no previous file with comments | « source/common/ucmndata.h ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698