Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(545)

Side by Side Diff: source/common/ucnv2022.cpp

Issue 1222643002: Update uconv.patch to exactly match upstream SVN r37045. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « patches/uconv.patch ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2000-2014, International Business Machines 3 * Copyright (C) 2000-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 * file name: ucnv2022.cpp 6 * file name: ucnv2022.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
11 * created on: 2000feb03 11 * created on: 2000feb03
12 * created by: Markus W. Scherer 12 * created by: Markus W. Scherer
13 * 13 *
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
68 * This means, for example, that when ISO-8859-7 is designated, the following 68 * This means, for example, that when ISO-8859-7 is designated, the following
69 * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. 69 * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
70 * The ICU ISO-2022 converter does not handle this - and has no information 70 * The ICU ISO-2022 converter does not handle this - and has no information
71 * about which subconverter would have to be shifted vs. which is designed 71 * about which subconverter would have to be shifted vs. which is designed
72 * for 7-bit ISO-2022. 72 * for 7-bit ISO-2022.
73 * 73 *
74 * Markus Scherer 2003-dec-03 74 * Markus Scherer 2003-dec-03
75 */ 75 */
76 #endif 76 #endif
77 77
78 #if !UCONFIG_ONLY_HTML_CONVERSION
78 static const char SHIFT_IN_STR[] = "\x0F"; 79 static const char SHIFT_IN_STR[] = "\x0F";
79 // static const char SHIFT_OUT_STR[] = "\x0E"; 80 // static const char SHIFT_OUT_STR[] = "\x0E";
81 #endif
80 82
81 #define CR 0x0D 83 #define CR 0x0D
82 #define LF 0x0A 84 #define LF 0x0A
83 #define H_TAB 0x09 85 #define H_TAB 0x09
84 #define V_TAB 0x0B 86 #define V_TAB 0x0B
85 #define SPACE 0x20 87 #define SPACE 0x20
86 88
87 enum { 89 enum {
88 HWKANA_START=0xff61, 90 HWKANA_START=0xff61,
89 HWKANA_END=0xff9f 91 HWKANA_END=0xff9f
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 CNS_11643_1, 147 CNS_11643_1,
146 CNS_11643_2, 148 CNS_11643_2,
147 CNS_11643_3, 149 CNS_11643_3,
148 CNS_11643_4, 150 CNS_11643_4,
149 CNS_11643_5, 151 CNS_11643_5,
150 CNS_11643_6, 152 CNS_11643_6,
151 CNS_11643_7 153 CNS_11643_7
152 } StateEnum; 154 } StateEnum;
153 155
154 /* is the StateEnum charset value for a DBCS charset? */ 156 /* is the StateEnum charset value for a DBCS charset? */
155 #if UCONFIG_NO_NON_HTML5_CONVERSION 157 #if UCONFIG_ONLY_HTML_CONVERSION
156 #define IS_JP_DBCS(cs) (JISX208==(cs)) 158 #define IS_JP_DBCS(cs) (JISX208==(cs))
157 #else 159 #else
158 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 160 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
159 #endif 161 #endif
160 162
161 #define CSM(cs) ((uint16_t)1<<(cs)) 163 #define CSM(cs) ((uint16_t)1<<(cs))
162 164
163 /* 165 /*
164 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence 166 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence
165 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x 167 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x
166 * 168 *
167 * Note: The converter uses some leniency: 169 * Note: The converter uses some leniency:
168 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in 170 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
169 * all versions, not just JIS7 and JIS8. 171 * all versions, not just JIS7 and JIS8.
170 * - ICU does not distinguish between different versions of JIS X 0208. 172 * - ICU does not distinguish between different versions of JIS X 0208.
171 */ 173 */
172 #if UCONFIG_NO_NON_HTML5_CONVERSION 174 #if UCONFIG_ONLY_HTML_CONVERSION
173 enum { MAX_JA_VERSION=0 }; 175 enum { MAX_JA_VERSION=0 };
174 #else 176 #else
175 enum { MAX_JA_VERSION=4 }; 177 enum { MAX_JA_VERSION=4 };
176 #endif 178 #endif
177 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 179 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
178 /*
179 * TODO(jshin): The encoding spec has JISX212, but we don't support it.
180 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
181 */
182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 180 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
183 #if !UCONFIG_NO_NON_HTML5_CONVERSION 181 #if !UCONFIG_ONLY_HTML_CONVERSION
184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 182 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 183 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 185 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
188 #endif 186 #endif
189 }; 187 };
190 188
191 typedef enum { 189 typedef enum {
192 ASCII1=0, 190 ASCII1=0,
193 LATIN1, 191 LATIN1,
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 368 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 369 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
373 }; 371 };
374 372
375 /* Type def for refactoring changeState_2022 code*/ 373 /* Type def for refactoring changeState_2022 code*/
376 typedef enum{ 374 typedef enum{
377 #ifdef U_ENABLE_GENERIC_ISO_2022 375 #ifdef U_ENABLE_GENERIC_ISO_2022
378 ISO_2022=0, 376 ISO_2022=0,
379 #endif 377 #endif
380 #if UCONFIG_NO_NON_HTML5_CONVERSION
381 ISO_2022_JP=1
382 #else
383 ISO_2022_JP=1, 378 ISO_2022_JP=1,
379 #if !UCONFIG_ONLY_HTML_CONVERSION
384 ISO_2022_KR=2, 380 ISO_2022_KR=2,
385 ISO_2022_CN=3 381 ISO_2022_CN=3
386 #endif 382 #endif
387 } Variant2022; 383 } Variant2022;
388 384
389 /*********** ISO 2022 Converter Protos ***********/ 385 /*********** ISO 2022 Converter Protos ***********/
390 static void 386 static void
391 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 387 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
392 388
393 static void 389 static void
(...skipping 13 matching lines...) Expand all
407 403
408 #ifdef U_ENABLE_GENERIC_ISO_2022 404 #ifdef U_ENABLE_GENERIC_ISO_2022
409 static void 405 static void
410 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UEr rorCode* err); 406 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UEr rorCode* err);
411 #endif 407 #endif
412 408
413 namespace { 409 namespace {
414 410
415 /*const UConverterSharedData _ISO2022Data;*/ 411 /*const UConverterSharedData _ISO2022Data;*/
416 extern const UConverterSharedData _ISO2022JPData; 412 extern const UConverterSharedData _ISO2022JPData;
413
414 #if !UCONFIG_ONLY_HTML_CONVERSION
417 extern const UConverterSharedData _ISO2022KRData; 415 extern const UConverterSharedData _ISO2022KRData;
418 extern const UConverterSharedData _ISO2022CNData; 416 extern const UConverterSharedData _ISO2022CNData;
417 #endif
419 418
420 } // namespace 419 } // namespace
421 420
422 /*************** Converter implementations ******************/ 421 /*************** Converter implementations ******************/
423 422
424 /* The purpose of this function is to get around gcc compiler warnings. */ 423 /* The purpose of this function is to get around gcc compiler warnings. */
425 static inline void 424 static inline void
426 fromUWriteUInt8(UConverter *cnv, 425 fromUWriteUInt8(UConverter *cnv,
427 const char *bytes, int32_t length, 426 const char *bytes, int32_t length,
428 uint8_t **target, const char *targetLimit, 427 uint8_t **target, const char *targetLimit,
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
493 myConverterData->version = version; 492 myConverterData->version = version;
494 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && 493 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
495 (myLocale[2]=='_' || myLocale[2]=='\0')) 494 (myLocale[2]=='_' || myLocale[2]=='\0'))
496 { 495 {
497 size_t len=0; 496 size_t len=0;
498 /* open the required converters and cache them */ 497 /* open the required converters and cache them */
499 if(version>MAX_JA_VERSION) { 498 if(version>MAX_JA_VERSION) {
500 /* prevent indexing beyond jpCharsetMasks[] */ 499 /* prevent indexing beyond jpCharsetMasks[] */
501 myConverterData->version = version = 0; 500 myConverterData->version = version = 0;
502 } 501 }
503 #if !UCONFIG_NO_NON_HTML5_CONVERSION
504 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 502 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
505 myConverterData->myConverterArray[ISO8859_7] = 503 myConverterData->myConverterArray[ISO8859_7] =
506 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode); 504 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode);
507 } 505 }
508 #endif
509 myConverterData->myConverterArray[JISX208] = 506 myConverterData->myConverterArray[JISX208] =
510 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code); 507 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code);
511 #if !UCONFIG_NO_NON_HTML5_CONVERSION
512 if(jpCharsetMasks[version]&CSM(JISX212)) { 508 if(jpCharsetMasks[version]&CSM(JISX212)) {
513 myConverterData->myConverterArray[JISX212] = 509 myConverterData->myConverterArray[JISX212] =
514 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode); 510 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode);
515 } 511 }
516 if(jpCharsetMasks[version]&CSM(GB2312)) { 512 if(jpCharsetMasks[version]&CSM(GB2312)) {
517 myConverterData->myConverterArray[GB2312] = 513 myConverterData->myConverterArray[GB2312] =
518 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */ 514 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */
519 } 515 }
520 if(jpCharsetMasks[version]&CSM(KSC5601)) { 516 if(jpCharsetMasks[version]&CSM(KSC5601)) {
521 myConverterData->myConverterArray[KSC5601] = 517 myConverterData->myConverterArray[KSC5601] =
522 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode); 518 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode);
523 } 519 }
524 #endif
525 520
526 /* set the function pointers to appropriate funtions */ 521 /* set the function pointers to appropriate funtions */
527 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 522 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
528 uprv_strcpy(myConverterData->locale,"ja"); 523 uprv_strcpy(myConverterData->locale,"ja");
529 524
530 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= "); 525 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= ");
531 len = uprv_strlen(myConverterData->name); 526 len = uprv_strlen(myConverterData->name);
532 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ; 527 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ;
533 myConverterData->name[len+1]='\0'; 528 myConverterData->name[len+1]='\0';
534 } 529 }
535 #if !UCONFIG_NO_NON_HTML5_CONVERSION 530 #if !UCONFIG_ONLY_HTML_CONVERSION
536 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 531 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
537 (myLocale[2]=='_' || myLocale[2]=='\0')) 532 (myLocale[2]=='_' || myLocale[2]=='\0'))
538 { 533 {
539 const char *cnvName; 534 const char *cnvName;
540 if(version==1) { 535 if(version==1) {
541 cnvName="icu-internal-25546"; 536 cnvName="icu-internal-25546";
542 } else { 537 } else {
543 cnvName="ibm-949"; 538 cnvName="ibm-949";
544 myConverterData->version=version=0; 539 myConverterData->version=version=0;
545 } 540 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
595 myConverterData->version = 0; 590 myConverterData->version = 0;
596 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0"); 591 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0");
597 }else if (version==1){ 592 }else if (version==1){
598 myConverterData->version = 1; 593 myConverterData->version = 1;
599 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1"); 594 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1");
600 }else { 595 }else {
601 myConverterData->version = 2; 596 myConverterData->version = 2;
602 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2"); 597 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2");
603 } 598 }
604 } 599 }
605 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION 600 #endif // !UCONFIG_ONLY_HTML_CONVERSION
606 else{ 601 else{
607 #ifdef U_ENABLE_GENERIC_ISO_2022 602 #ifdef U_ENABLE_GENERIC_ISO_2022
608 myConverterData->isFirstBuffer = TRUE; 603 myConverterData->isFirstBuffer = TRUE;
609 604
610 /* append the UTF-8 escape sequence */ 605 /* append the UTF-8 escape sequence */
611 cnv->charErrorBufferLength = 3; 606 cnv->charErrorBufferLength = 3;
612 cnv->charErrorBuffer[0] = 0x1b; 607 cnv->charErrorBuffer[0] = 0x1b;
613 cnv->charErrorBuffer[1] = 0x25; 608 cnv->charErrorBuffer[1] = 0x25;
614 cnv->charErrorBuffer[2] = 0x42; 609 cnv->charErrorBuffer[2] = 0x42;
615 610
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
730 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 725 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
731 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE 726 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE
732 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 727 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
733 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE 728 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE
734 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 729 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
735 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 730 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 731 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 732 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
738 }; 733 };
739 734
740 #if !UCONFIG_NO_NON_HTML5_CONVERSION 735 #if !UCONFIG_ONLY_HTML_CONVERSION
741 /*************** to unicode *******************/ 736 /*************** to unicode *******************/
742 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 737 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
743 /* 0 1 2 3 4 5 6 7 8 9 */ 738 /* 0 1 2 3 4 5 6 7 8 9 */
744 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 739 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
745 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 740 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
746 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 741 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 742 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 743 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
749 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 744 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 745 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
896 if(myData2022->toU2022State.g<2) { 891 if(myData2022->toU2022State.g<2) {
897 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g; 892 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;
898 } 893 }
899 myData2022->toU2022State.g=2; 894 myData2022->toU2022State.g=2;
900 } else { 895 } else {
901 /* illegal to have SS2 before a matching designator */ 896 /* illegal to have SS2 before a matching designator */
902 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 897 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
903 } 898 }
904 break; 899 break;
905 /* case SS3_STATE: not used in ISO-2022-JP-x */ 900 /* case SS3_STATE: not used in ISO-2022-JP-x */
906 #if !UCONFIG_NO_NON_HTML5_CONVERSION
907 case ISO8859_1: 901 case ISO8859_1:
908 case ISO8859_7: 902 case ISO8859_7:
909 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 903 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
910 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 904 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
911 } else { 905 } else {
912 /* G2 charset for SS2 */ 906 /* G2 charset for SS2 */
913 myData2022->toU2022State.cs[2]=(int8_t)tempState; 907 myData2022->toU2022State.cs[2]=(int8_t)tempState;
914 } 908 }
915 break; 909 break;
916 #endif
917 default: 910 default:
918 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 911 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
919 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 912 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
920 } else { 913 } else {
921 /* G0 charset */ 914 /* G0 charset */
922 myData2022->toU2022State.cs[0]=(int8_t)tempState; 915 myData2022->toU2022State.cs[0]=(int8_t)tempState;
923 } 916 }
924 break; 917 break;
925 } 918 }
926 } 919 }
927 break; 920 break;
928 #if !UCONFIG_NO_NON_HTML5_CONVERSION 921 #if !UCONFIG_ONLY_HTML_CONVERSION
929 case ISO_2022_CN: 922 case ISO_2022_CN:
930 { 923 {
931 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 924 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
932 switch(tempState) { 925 switch(tempState) {
933 case INVALID_STATE: 926 case INVALID_STATE:
934 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 927 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
935 break; 928 break;
936 case SS2_STATE: 929 case SS2_STATE:
937 if(myData2022->toU2022State.cs[2]!=0) { 930 if(myData2022->toU2022State.cs[2]!=0) {
938 if(myData2022->toU2022State.g<2) { 931 if(myData2022->toU2022State.g<2) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
980 } 973 }
981 } 974 }
982 break; 975 break;
983 case ISO_2022_KR: 976 case ISO_2022_KR:
984 if(offset==0x30){ 977 if(offset==0x30){
985 /* nothing to be done, just accept this one escape sequence */ 978 /* nothing to be done, just accept this one escape sequence */
986 } else { 979 } else {
987 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 980 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
988 } 981 }
989 break; 982 break;
990 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 983 #endif // !UCONFIG_ONLY_HTML_CONVERSION
991 984
992 default: 985 default:
993 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 986 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
994 break; 987 break;
995 } 988 }
996 } 989 }
997 if(U_SUCCESS(*err)) { 990 if(U_SUCCESS(*err)) {
998 _this->toULength = 0; 991 _this->toULength = 0;
999 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { 992 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
1000 if(_this->toULength>1) { 993 if(_this->toULength>1) {
(...skipping 22 matching lines...) Expand all
1023 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng th); 1016 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng th);
1024 *source-=bytesFromThisBuffer; 1017 *source-=bytesFromThisBuffer;
1025 } 1018 }
1026 _this->toULength=1; 1019 _this->toULength=1;
1027 } 1020 }
1028 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { 1021 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
1029 _this->toUCallbackReason = UCNV_UNASSIGNED; 1022 _this->toUCallbackReason = UCNV_UNASSIGNED;
1030 } 1023 }
1031 } 1024 }
1032 1025
1026 #if !UCONFIG_ONLY_HTML_CONVERSION
1033 /*Checks the characters of the buffer against valid 2022 escape sequences 1027 /*Checks the characters of the buffer against valid 2022 escape sequences
1034 *if the match we return a pointer to the initial start of the sequence otherwise 1028 *if the match we return a pointer to the initial start of the sequence otherwise
1035 *we return sourceLimit 1029 *we return sourceLimit
1036 */ 1030 */
1037 /*for 2022 looks ahead in the stream 1031 /*for 2022 looks ahead in the stream
1038 *to determine the longest possible convertible 1032 *to determine the longest possible convertible
1039 *data stream 1033 *data stream
1040 */ 1034 */
1041 static inline const char* 1035 static inline const char*
1042 getEndOfBuffer_2022(const char** source, 1036 getEndOfBuffer_2022(const char** source,
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
1077 }while (++mySource < sourceLimit); 1071 }while (++mySource < sourceLimit);
1078 1072
1079 return sourceLimit; 1073 return sourceLimit;
1080 #else 1074 #else
1081 while(mySource < sourceLimit && *mySource != ESC_2022) { 1075 while(mySource < sourceLimit && *mySource != ESC_2022) {
1082 ++mySource; 1076 ++mySource;
1083 } 1077 }
1084 return mySource; 1078 return mySource;
1085 #endif 1079 #endif
1086 } 1080 }
1087 1081 #endif
1088 1082
1089 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmb cs.c 1083 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmb cs.c
1090 * any future change in _MBCSFromUChar32() function should be reflected here. 1084 * any future change in _MBCSFromUChar32() function should be reflected here.
1091 * @return number of bytes in *value; negative number if fallback; 0 if no mappi ng 1085 * @return number of bytes in *value; negative number if fallback; 0 if no mappi ng
1092 */ 1086 */
1093 static inline int32_t 1087 static inline int32_t
1094 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, 1088 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
1095 UChar32 c, 1089 UChar32 c,
1096 uint32_t* value, 1090 uint32_t* value,
1097 UBool useFallback, 1091 UBool useFallback,
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after
1401 * KSC5601 : alias to ibm-949 mapping table 1395 * KSC5601 : alias to ibm-949 mapping table
1402 * GB2312 : alias to ibm-1386 mapping table 1396 * GB2312 : alias to ibm-1386 mapping table
1403 * ISO-8859-1 : Algorithmic implemented as LATIN1 case 1397 * ISO-8859-1 : Algorithmic implemented as LATIN1 case
1404 * ISO-8859-7 : alisas to ibm-9409 mapping table 1398 * ISO-8859-7 : alisas to ibm-9409 mapping table
1405 */ 1399 */
1406 1400
1407 /* preference order of JP charsets */ 1401 /* preference order of JP charsets */
1408 static const StateEnum jpCharsetPref[]={ 1402 static const StateEnum jpCharsetPref[]={
1409 ASCII, 1403 ASCII,
1410 JISX201, 1404 JISX201,
1411 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1412 ISO8859_1, 1405 ISO8859_1,
1413 ISO8859_7, 1406 ISO8859_7,
1414 #endif
1415 JISX208, 1407 JISX208,
1416 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1417 JISX212, 1408 JISX212,
1418 GB2312, 1409 GB2312,
1419 KSC5601, 1410 KSC5601,
1420 #endif
1421 HWKANA_7BIT 1411 HWKANA_7BIT
1422 }; 1412 };
1423 1413
1424 /* 1414 /*
1425 * The escape sequences must be in order of the enum constants like JISX201 = 3 , 1415 * The escape sequences must be in order of the enum constants like JISX201 = 3 ,
1426 * not in order of jpCharsetPref[]! 1416 * not in order of jpCharsetPref[]!
1427 */ 1417 */
1428 static const char escSeqChars[][6] ={ 1418 static const char escSeqChars[][6] ={
1429 "\x1B\x28\x42", /* <ESC>(B ASCII */ 1419 "\x1B\x28\x42", /* <ESC>(B ASCII */
1430 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ 1420 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after
1780 int8_t cs0 = choices[i]; 1770 int8_t cs0 = choices[i];
1781 switch(cs0) { 1771 switch(cs0) {
1782 case ASCII: 1772 case ASCII:
1783 if(sourceChar <= 0x7f) { 1773 if(sourceChar <= 0x7f) {
1784 targetValue = (uint32_t)sourceChar; 1774 targetValue = (uint32_t)sourceChar;
1785 len = 1; 1775 len = 1;
1786 cs = cs0; 1776 cs = cs0;
1787 g = 0; 1777 g = 0;
1788 } 1778 }
1789 break; 1779 break;
1790 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1791 case ISO8859_1: 1780 case ISO8859_1:
1792 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 1781 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1793 targetValue = (uint32_t)sourceChar - 0x80; 1782 targetValue = (uint32_t)sourceChar - 0x80;
1794 len = 1; 1783 len = 1;
1795 cs = cs0; 1784 cs = cs0;
1796 g = 2; 1785 g = 2;
1797 } 1786 }
1798 break; 1787 break;
1799 #endif
1800 case HWKANA_7BIT: 1788 case HWKANA_7BIT:
1801 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) { 1789 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) {
1802 if(converterData->version==3) { 1790 if(converterData->version==3) {
1803 /* JIS7: use G1 (SO) */ 1791 /* JIS7: use G1 (SO) */
1804 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ 1792 /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1805 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); 1793 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
1806 len = 1; 1794 len = 1;
1807 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ 1795 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
1808 g = 1; 1796 g = 1;
1809 } else if(converterData->version==4) { 1797 } else if(converterData->version==4) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1851 } 1839 }
1852 } else if(len == 0 && useFallback && 1840 } else if(len == 0 && useFallback &&
1853 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) { 1841 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) {
1854 targetValue = hwkana_fb[sourceChar - HWKANA_START]; 1842 targetValue = hwkana_fb[sourceChar - HWKANA_START];
1855 len = -2; 1843 len = -2;
1856 cs = cs0; 1844 cs = cs0;
1857 g = 0; 1845 g = 0;
1858 useFallback = FALSE; 1846 useFallback = FALSE;
1859 } 1847 }
1860 break; 1848 break;
1861 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1862 case ISO8859_7: 1849 case ISO8859_7:
1863 /* G0 SBCS forced to 7-bit output */ 1850 /* G0 SBCS forced to 7-bit output */
1864 len2 = MBCS_SINGLE_FROM_UCHAR32( 1851 len2 = MBCS_SINGLE_FROM_UCHAR32(
1865 converterData->myConverterArray[cs0], 1852 converterData->myConverterArray[cs0],
1866 sourceChar, &value, 1853 sourceChar, &value,
1867 useFallback); 1854 useFallback);
1868 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) { 1855 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) {
1869 targetValue = value - 0x80; 1856 targetValue = value - 0x80;
1870 len = len2; 1857 len = len2;
1871 cs = cs0; 1858 cs = cs0;
1872 g = 2; 1859 g = 2;
1873 useFallback = FALSE; 1860 useFallback = FALSE;
1874 } 1861 }
1875 break; 1862 break;
1876 #endif
1877 default: 1863 default:
1878 /* G0 DBCS */ 1864 /* G0 DBCS */
1879 len2 = MBCS_FROM_UCHAR32_ISO2022( 1865 len2 = MBCS_FROM_UCHAR32_ISO2022(
1880 converterData->myConverterArray[cs0], 1866 converterData->myConverterArray[cs0],
1881 sourceChar, &value, 1867 sourceChar, &value,
1882 useFallback, MBCS_OUTPUT_2); 1868 useFallback, MBCS_OUTPUT_2);
1883 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1869 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1884 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1885 if(cs0 == KSC5601) { 1870 if(cs0 == KSC5601) {
1886 /* 1871 /*
1887 * Check for valid bytes for the encoding scheme. 1872 * Check for valid bytes for the encoding scheme.
1888 * This is necessary because the sub-converter (wind ows-949) 1873 * This is necessary because the sub-converter (wind ows-949)
1889 * has a broader encoding scheme than is valid for 2 022. 1874 * has a broader encoding scheme than is valid for 2 022.
1890 */ 1875 */
1891 value = _2022FromGR94DBCS(value); 1876 value = _2022FromGR94DBCS(value);
1892 if(value == 0) { 1877 if(value == 0) {
1893 break; 1878 break;
1894 } 1879 }
1895 } 1880 }
1896 #endif
1897 targetValue = value; 1881 targetValue = value;
1898 len = len2; 1882 len = len2;
1899 cs = cs0; 1883 cs = cs0;
1900 g = 0; 1884 g = 0;
1901 useFallback = FALSE; 1885 useFallback = FALSE;
1902 } 1886 }
1903 break; 1887 break;
1904 } 1888 }
1905 } 1889 }
1906 1890
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
2180 /* return from a single-shift state to the previous one */ 2164 /* return from a single-shift state to the previous one */
2181 if(pToU2022State->g >= 2) { 2165 if(pToU2022State->g >= 2) {
2182 pToU2022State->g=pToU2022State->prevG; 2166 pToU2022State->g=pToU2022State->prevG;
2183 } 2167 }
2184 } else switch(cs) { 2168 } else switch(cs) {
2185 case ASCII: 2169 case ASCII:
2186 if(mySourceChar <= 0x7f) { 2170 if(mySourceChar <= 0x7f) {
2187 targetUniChar = mySourceChar; 2171 targetUniChar = mySourceChar;
2188 } 2172 }
2189 break; 2173 break;
2190 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2191 case ISO8859_1: 2174 case ISO8859_1:
2192 if(mySourceChar <= 0x7f) { 2175 if(mySourceChar <= 0x7f) {
2193 targetUniChar = mySourceChar + 0x80; 2176 targetUniChar = mySourceChar + 0x80;
2194 } 2177 }
2195 /* return from a single-shift state to the previous one */ 2178 /* return from a single-shift state to the previous one */
2196 pToU2022State->g=pToU2022State->prevG; 2179 pToU2022State->g=pToU2022State->prevG;
2197 break; 2180 break;
2198 case ISO8859_7: 2181 case ISO8859_7:
2199 if(mySourceChar <= 0x7f) { 2182 if(mySourceChar <= 0x7f) {
2200 /* convert mySourceChar+0x80 to use a normal 8-bit table */ 2183 /* convert mySourceChar+0x80 to use a normal 8-bit table */
2201 targetUniChar = 2184 targetUniChar =
2202 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( 2185 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
2203 myData->myConverterArray[cs], 2186 myData->myConverterArray[cs],
2204 mySourceChar + 0x80); 2187 mySourceChar + 0x80);
2205 } 2188 }
2206 /* return from a single-shift state to the previous one */ 2189 /* return from a single-shift state to the previous one */
2207 pToU2022State->g=pToU2022State->prevG; 2190 pToU2022State->g=pToU2022State->prevG;
2208 break; 2191 break;
2209 #endif
2210 case JISX201: 2192 case JISX201:
2211 if(mySourceChar <= 0x7f) { 2193 if(mySourceChar <= 0x7f) {
2212 targetUniChar = jisx201ToU(mySourceChar); 2194 targetUniChar = jisx201ToU(mySourceChar);
2213 } 2195 }
2214 break; 2196 break;
2215 case HWKANA_7BIT: 2197 case HWKANA_7BIT:
2216 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { 2198 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
2217 /* 7-bit halfwidth Katakana */ 2199 /* 7-bit halfwidth Katakana */
2218 targetUniChar = mySourceChar + (HWKANA_START - 0x21); 2200 targetUniChar = mySourceChar + (HWKANA_START - 0x21);
2219 } 2201 }
(...skipping 19 matching lines...) Expand all
2239 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ; 2221 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ;
2240 if (leadIsOk && trailIsOk) { 2222 if (leadIsOk && trailIsOk) {
2241 ++mySource; 2223 ++mySource;
2242 tmpSourceChar = (mySourceChar << 8) | trailByte; 2224 tmpSourceChar = (mySourceChar << 8) | trailByte;
2243 if(cs == JISX208) { 2225 if(cs == JISX208) {
2244 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf); 2226 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf);
2245 mySourceChar = tmpSourceChar; 2227 mySourceChar = tmpSourceChar;
2246 } else { 2228 } else {
2247 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */ 2229 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */
2248 mySourceChar = tmpSourceChar; 2230 mySourceChar = tmpSourceChar;
2249 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2250 if (cs == KSC5601) { 2231 if (cs == KSC5601) {
2251 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */ 2232 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */
2252 } 2233 }
2253 #endif
2254 tempBuf[0] = (char)(tmpSourceChar >> 8); 2234 tempBuf[0] = (char)(tmpSourceChar >> 8);
2255 tempBuf[1] = (char)(tmpSourceChar); 2235 tempBuf[1] = (char)(tmpSourceChar);
2256 } 2236 }
2257 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE); 2237 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE);
2258 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 2238 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
2259 /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 2239 /* report a pair of illegal bytes if the second byte is not a DBCS starter */
2260 ++mySource; 2240 ++mySource;
2261 /* add another bit so that the code below writes 2 b ytes in case of error */ 2241 /* add another bit so that the code below writes 2 b ytes in case of error */
2262 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte; 2242 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte;
2263 } 2243 }
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2305 *err =U_BUFFER_OVERFLOW_ERROR; 2285 *err =U_BUFFER_OVERFLOW_ERROR;
2306 break; 2286 break;
2307 } 2287 }
2308 } 2288 }
2309 endloop: 2289 endloop:
2310 args->target = myTarget; 2290 args->target = myTarget;
2311 args->source = mySource; 2291 args->source = mySource;
2312 } 2292 }
2313 2293
2314 2294
2315 #if !UCONFIG_NO_NON_HTML5_CONVERSION 2295 #if !UCONFIG_ONLY_HTML_CONVERSION
2316 /*************************************************************** 2296 /***************************************************************
2317 * Rules for ISO-2022-KR encoding 2297 * Rules for ISO-2022-KR encoding
2318 * i) The KSC5601 designator sequence should appear only once in a file, 2298 * i) The KSC5601 designator sequence should appear only once in a file,
2319 * at the begining of a line before any KSC5601 characters. This usually 2299 * at the begining of a line before any KSC5601 characters. This usually
2320 * means that it appears by itself on the first line of the file 2300 * means that it appears by itself on the first line of the file
2321 * ii) There are only 2 shifting sequences SO to shift into double byte mode 2301 * ii) There are only 2 shifting sequences SO to shift into double byte mode
2322 * and SI to shift into single byte mode 2302 * and SI to shift into single byte mode
2323 */ 2303 */
2324 static void 2304 static void
2325 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2305 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after
3449 } 3429 }
3450 else{ 3430 else{
3451 *err =U_BUFFER_OVERFLOW_ERROR; 3431 *err =U_BUFFER_OVERFLOW_ERROR;
3452 break; 3432 break;
3453 } 3433 }
3454 } 3434 }
3455 endloop: 3435 endloop:
3456 args->target = myTarget; 3436 args->target = myTarget;
3457 args->source = mySource; 3437 args->source = mySource;
3458 } 3438 }
3459 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 3439 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3460 3440
3461 static void 3441 static void
3462 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) { 3442 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) {
3463 UConverter *cnv = args->converter; 3443 UConverter *cnv = args->converter;
3464 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo; 3444 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo;
3465 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; 3445 ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3466 char *p, *subchar; 3446 char *p, *subchar;
3467 char buffer[8]; 3447 char buffer[8];
3468 int32_t length; 3448 int32_t length;
3469 3449
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
3651 #endif 3631 #endif
3652 3632
3653 cnvData = (UConverterDataISO2022*)cnv->extraInfo; 3633 cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3654 3634
3655 /* open a set and initialize it with code points that are algorithmically ro und-tripped */ 3635 /* open a set and initialize it with code points that are algorithmically ro und-tripped */
3656 switch(cnvData->locale[0]){ 3636 switch(cnvData->locale[0]){
3657 case 'j': 3637 case 'j':
3658 /* include JIS X 0201 which is hardcoded */ 3638 /* include JIS X 0201 which is hardcoded */
3659 sa->add(sa->set, 0xa5); 3639 sa->add(sa->set, 0xa5);
3660 sa->add(sa->set, 0x203e); 3640 sa->add(sa->set, 0x203e);
3661 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3662 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3641 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3663 /* include Latin-1 for some variants of JP */ 3642 /* include Latin-1 for some variants of JP */
3664 sa->addRange(sa->set, 0, 0xff); 3643 sa->addRange(sa->set, 0, 0xff);
3665 } else { 3644 } else {
3666 /* include ASCII for JP */ 3645 /* include ASCII for JP */
3667 sa->addRange(sa->set, 0, 0x7f); 3646 sa->addRange(sa->set, 0, 0x7f);
3668 } 3647 }
3669 #else
3670 /* include ASCII for JP */
3671 sa->addRange(sa->set, 0, 0x7f);
3672 #endif
3673 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) { 3648 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) {
3674 /* 3649 /*
3675 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0 3650 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0
3676 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) 3651 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3677 * use half-width Katakana. 3652 * use half-width Katakana.
3678 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) 3653 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3679 * half-width Katakana via the ESC ( I sequence. 3654 * half-width Katakana via the ESC ( I sequence.
3680 * However, we only emit (fromUnicode) half-width Katakana according to the 3655 * However, we only emit (fromUnicode) half-width Katakana according to the
3681 * definition of each variant. 3656 * definition of each variant.
3682 * 3657 *
3683 * When including fallbacks, 3658 * When including fallbacks,
3684 * we need to include half-width Katakana Unicode code points for al l JP variants because 3659 * we need to include half-width Katakana Unicode code points for al l JP variants because
3685 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana). 3660 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana).
3686 */ 3661 */
3687 /* include half-width Katakana for JP */ 3662 /* include half-width Katakana for JP */
3688 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3663 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3689 } 3664 }
3690 break; 3665 break;
3691 #if !UCONFIG_NO_NON_HTML5_CONVERSION 3666 #if !UCONFIG_ONLY_HTML_CONVERSION
3692 case 'c': 3667 case 'c':
3693 case 'z': 3668 case 'z':
3694 /* include ASCII for CN */ 3669 /* include ASCII for CN */
3695 sa->addRange(sa->set, 0, 0x7f); 3670 sa->addRange(sa->set, 0, 0x7f);
3696 break; 3671 break;
3697 case 'k': 3672 case 'k':
3698 /* there is only one converter for KR, and it is not in the myConverterA rray[] */ 3673 /* there is only one converter for KR, and it is not in the myConverterA rray[] */
3699 cnvData->currentConverter->sharedData->impl->getUnicodeSet( 3674 cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3700 cnvData->currentConverter, sa, which, pErrorCode); 3675 cnvData->currentConverter, sa, which, pErrorCode);
3701 /* the loop over myConverterArray[] will simply not find another convert er */ 3676 /* the loop over myConverterArray[] will simply not find another convert er */
(...skipping 18 matching lines...) Expand all
3720 3695
3721 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 3696 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
3722 UConverterSetFilter filter; 3697 UConverterSetFilter filter;
3723 if(cnvData->myConverterArray[i]!=NULL) { 3698 if(cnvData->myConverterArray[i]!=NULL) {
3724 if(cnvData->locale[0]=='j' && i==JISX208) { 3699 if(cnvData->locale[0]=='j' && i==JISX208) {
3725 /* 3700 /*
3726 * Only add code points that map to Shift-JIS codes 3701 * Only add code points that map to Shift-JIS codes
3727 * corresponding to JIS X 0208. 3702 * corresponding to JIS X 0208.
3728 */ 3703 */
3729 filter=UCNV_SET_FILTER_SJIS; 3704 filter=UCNV_SET_FILTER_SJIS;
3730 #if !UCONFIG_NO_NON_HTML5_CONVERSION 3705 #if !UCONFIG_ONLY_HTML_CONVERSION
3731 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3706 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3732 cnvData->version==0 && i==CNS_11643) { 3707 cnvData->version==0 && i==CNS_11643) {
3733 /* 3708 /*
3734 * Version-specific for CN: 3709 * Version-specific for CN:
3735 * CN version 0 does not map CNS planes 3..7 although 3710 * CN version 0 does not map CNS planes 3..7 although
3736 * they are all available in the CNS conversion table; 3711 * they are all available in the CNS conversion table;
3737 * CN version 1 (-EXT) does map them all. 3712 * CN version 1 (-EXT) does map them all.
3738 * The two versions create different Unicode sets. 3713 * The two versions create different Unicode sets.
3739 */ 3714 */
3740 filter=UCNV_SET_FILTER_2022_CN; 3715 filter=UCNV_SET_FILTER_2022_CN;
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
3859 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3834 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3860 }; 3835 };
3861 3836
3862 namespace { 3837 namespace {
3863 3838
3864 const UConverterSharedData _ISO2022JPData= 3839 const UConverterSharedData _ISO2022JPData=
3865 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022J PImpl); 3840 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022J PImpl);
3866 3841
3867 } // namespace 3842 } // namespace
3868 3843
3869 #if !UCONFIG_NO_NON_HTML5_CONVERSION 3844 #if !UCONFIG_ONLY_HTML_CONVERSION
3870 /************* KR ***************/ 3845 /************* KR ***************/
3871 static const UConverterImpl _ISO2022KRImpl={ 3846 static const UConverterImpl _ISO2022KRImpl={
3872 UCNV_ISO_2022, 3847 UCNV_ISO_2022,
3873 3848
3874 NULL, 3849 NULL,
3875 NULL, 3850 NULL,
3876 3851
3877 _ISO2022Open, 3852 _ISO2022Open,
3878 _ISO2022Close, 3853 _ISO2022Close,
3879 _ISO2022Reset, 3854 _ISO2022Reset,
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
3960 0, 3935 0,
3961 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3936 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3962 }; 3937 };
3963 3938
3964 namespace { 3939 namespace {
3965 3940
3966 const UConverterSharedData _ISO2022CNData= 3941 const UConverterSharedData _ISO2022CNData=
3967 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022C NImpl); 3942 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022C NImpl);
3968 3943
3969 } // namespace 3944 } // namespace
3970 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 3945 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3971 3946
3972 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 3947 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
OLDNEW
« no previous file with comments | « patches/uconv.patch ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698