Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(354)

Side by Side Diff: source/common/ucnv2022.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/ucmndata.h ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2000-2012, International Business Machines 3 * Copyright (C) 2000-2014, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 * file name: ucnv2022.cpp 6 * file name: ucnv2022.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
11 * created on: 2000feb03 11 * created on: 2000feb03
12 * created by: Markus W. Scherer 12 * created by: Markus W. Scherer
13 * 13 *
(...skipping 22 matching lines...) Expand all
36 #include "unicode/ucnv_cb.h" 36 #include "unicode/ucnv_cb.h"
37 #include "unicode/utf16.h" 37 #include "unicode/utf16.h"
38 #include "ucnv_imp.h" 38 #include "ucnv_imp.h"
39 #include "ucnv_bld.h" 39 #include "ucnv_bld.h"
40 #include "ucnv_cnv.h" 40 #include "ucnv_cnv.h"
41 #include "ucnvmbcs.h" 41 #include "ucnvmbcs.h"
42 #include "cstring.h" 42 #include "cstring.h"
43 #include "cmemory.h" 43 #include "cmemory.h"
44 #include "uassert.h" 44 #include "uassert.h"
45 45
46 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
47
48 #ifdef U_ENABLE_GENERIC_ISO_2022 46 #ifdef U_ENABLE_GENERIC_ISO_2022
49 /* 47 /*
50 * I am disabling the generic ISO-2022 converter after proposing to do so on 48 * I am disabling the generic ISO-2022 converter after proposing to do so on
51 * the icu mailing list two days ago. 49 * the icu mailing list two days ago.
52 * 50 *
53 * Reasons: 51 * Reasons:
54 * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of 52 * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
55 * its designation sequences, single shifts with return to the previous state , 53 * its designation sequences, single shifts with return to the previous state ,
56 * switch-with-no-return to UTF-16BE or similar, etc. 54 * switch-with-no-return to UTF-16BE or similar, etc.
57 * This is unlike the language-specific variants like ISO-2022-JP which 55 * This is unlike the language-specific variants like ISO-2022-JP which
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 CNS_11643_1, 145 CNS_11643_1,
148 CNS_11643_2, 146 CNS_11643_2,
149 CNS_11643_3, 147 CNS_11643_3,
150 CNS_11643_4, 148 CNS_11643_4,
151 CNS_11643_5, 149 CNS_11643_5,
152 CNS_11643_6, 150 CNS_11643_6,
153 CNS_11643_7 151 CNS_11643_7
154 } StateEnum; 152 } StateEnum;
155 153
156 /* is the StateEnum charset value for a DBCS charset? */ 154 /* is the StateEnum charset value for a DBCS charset? */
157 #if UCONFIG_NO_NON_HTML5_CONVERSION
158 #define IS_JP_DBCS(cs) (JISX208==(cs))
159 #else
160 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
161 #endif
162 156
163 #define CSM(cs) ((uint16_t)1<<(cs)) 157 #define CSM(cs) ((uint16_t)1<<(cs))
164 158
165 /* 159 /*
166 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence 160 * Each of these charset masks (with index x) contains a bit for a charset in ex act correspondence
167 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x 161 * to whether that charset is used in the corresponding version x of ISO_2022,lo cale=ja,version=x
168 * 162 *
169 * Note: The converter uses some leniency: 163 * Note: The converter uses some leniency:
170 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in 164 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
171 * all versions, not just JIS7 and JIS8. 165 * all versions, not just JIS7 and JIS8.
172 * - ICU does not distinguish between different versions of JIS X 0208. 166 * - ICU does not distinguish between different versions of JIS X 0208.
173 */ 167 */
174 #if UCONFIG_NO_NON_HTML5_CONVERSION
175 enum { MAX_JA_VERSION=0 };
176 #else
177 enum { MAX_JA_VERSION=4 }; 168 enum { MAX_JA_VERSION=4 };
178 #endif
179 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
180 /*
181 * TODO(jshin): The encoding spec has JISX212, but we don't support it.
182 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
183 */
184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 170 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
185 #if !UCONFIG_NO_NON_HTML5_CONVERSION
186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 171 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 172 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
188 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 173 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
189 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 174 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231 2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
190 #endif
191 }; 175 };
192 176
193 typedef enum { 177 typedef enum {
194 ASCII1=0, 178 ASCII1=0,
195 LATIN1, 179 LATIN1,
196 SBCS, 180 SBCS,
197 DBCS, 181 DBCS,
198 MBCS, 182 MBCS,
199 HWKANA 183 HWKANA
200 }Cnv2022Type; 184 }Cnv2022Type;
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after
367 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_ 2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 351 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_ 2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
368 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 352 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
369 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T ERMINAL_2022 ,VALID_TERMINAL_2022 353 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T ERMINAL_2022 ,VALID_TERMINAL_2022
370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 354 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 355 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 356 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
373 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022 357 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_ 2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI NAL_2022 ,VALID_TERMINAL_2022
374 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 358 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
375 }; 359 };
376 360
361
377 /* Type def for refactoring changeState_2022 code*/ 362 /* Type def for refactoring changeState_2022 code*/
378 typedef enum{ 363 typedef enum{
379 #ifdef U_ENABLE_GENERIC_ISO_2022 364 #ifdef U_ENABLE_GENERIC_ISO_2022
380 ISO_2022=0, 365 ISO_2022=0,
381 #endif 366 #endif
382 #if UCONFIG_NO_NON_HTML5_CONVERSION
383 ISO_2022_JP=1
384 #else
385 ISO_2022_JP=1, 367 ISO_2022_JP=1,
386 ISO_2022_KR=2, 368 ISO_2022_KR=2,
387 ISO_2022_CN=3 369 ISO_2022_CN=3
388 #endif
389 } Variant2022; 370 } Variant2022;
390 371
391 /*********** ISO 2022 Converter Protos ***********/ 372 /*********** ISO 2022 Converter Protos ***********/
392 static void 373 static void
393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
394 375
395 static void 376 static void
396 _ISO2022Close(UConverter *converter); 377 _ISO2022Close(UConverter *converter);
397 378
398 static void 379 static void
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
495 myConverterData->version = version; 476 myConverterData->version = version;
496 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && 477 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
497 (myLocale[2]=='_' || myLocale[2]=='\0')) 478 (myLocale[2]=='_' || myLocale[2]=='\0'))
498 { 479 {
499 size_t len=0; 480 size_t len=0;
500 /* open the required converters and cache them */ 481 /* open the required converters and cache them */
501 if(version>MAX_JA_VERSION) { 482 if(version>MAX_JA_VERSION) {
502 /* prevent indexing beyond jpCharsetMasks[] */ 483 /* prevent indexing beyond jpCharsetMasks[] */
503 myConverterData->version = version = 0; 484 myConverterData->version = version = 0;
504 } 485 }
505 #if !UCONFIG_NO_NON_HTML5_CONVERSION
506 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 486 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
507 myConverterData->myConverterArray[ISO8859_7] = 487 myConverterData->myConverterArray[ISO8859_7] =
508 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode); 488 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e rrorCode);
509 } 489 }
510 #endif
511 myConverterData->myConverterArray[JISX208] = 490 myConverterData->myConverterArray[JISX208] =
512 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code); 491 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error Code);
513 #if !UCONFIG_NO_NON_HTML5_CONVERSION
514 if(jpCharsetMasks[version]&CSM(JISX212)) { 492 if(jpCharsetMasks[version]&CSM(JISX212)) {
515 myConverterData->myConverterArray[JISX212] = 493 myConverterData->myConverterArray[JISX212] =
516 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode); 494 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er rorCode);
517 } 495 }
518 if(jpCharsetMasks[version]&CSM(GB2312)) { 496 if(jpCharsetMasks[version]&CSM(GB2312)) {
519 myConverterData->myConverterArray[GB2312] = 497 myConverterData->myConverterArray[GB2312] =
520 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */ 498 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er rorCode); /* gb_2312_80-1 */
521 } 499 }
522 if(jpCharsetMasks[version]&CSM(KSC5601)) { 500 if(jpCharsetMasks[version]&CSM(KSC5601)) {
523 myConverterData->myConverterArray[KSC5601] = 501 myConverterData->myConverterArray[KSC5601] =
524 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode); 502 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er rorCode);
525 } 503 }
526 #endif
527 504
528 /* set the function pointers to appropriate funtions */ 505 /* set the function pointers to appropriate funtions */
529 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 506 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
530 uprv_strcpy(myConverterData->locale,"ja"); 507 uprv_strcpy(myConverterData->locale,"ja");
531 508
532 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= "); 509 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version= ");
533 len = uprv_strlen(myConverterData->name); 510 len = uprv_strlen(myConverterData->name);
534 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ; 511 myConverterData->name[len]=(char)(myConverterData->version+(int)'0') ;
535 myConverterData->name[len+1]='\0'; 512 myConverterData->name[len+1]='\0';
536 } 513 }
537 #if !UCONFIG_NO_NON_HTML5_CONVERSION
538 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 514 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
539 (myLocale[2]=='_' || myLocale[2]=='\0')) 515 (myLocale[2]=='_' || myLocale[2]=='\0'))
540 { 516 {
541 const char *cnvName; 517 const char *cnvName;
542 if(version==1) { 518 if(version==1) {
543 cnvName="icu-internal-25546"; 519 cnvName="icu-internal-25546";
544 } else { 520 } else {
545 cnvName="ibm-949"; 521 cnvName="ibm-949";
546 myConverterData->version=version=0; 522 myConverterData->version=version=0;
547 } 523 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
597 myConverterData->version = 0; 573 myConverterData->version = 0;
598 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0"); 574 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=0");
599 }else if (version==1){ 575 }else if (version==1){
600 myConverterData->version = 1; 576 myConverterData->version = 1;
601 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1"); 577 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=1");
602 }else { 578 }else {
603 myConverterData->version = 2; 579 myConverterData->version = 2;
604 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2"); 580 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers ion=2");
605 } 581 }
606 } 582 }
607 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION
608 else{ 583 else{
609 #ifdef U_ENABLE_GENERIC_ISO_2022 584 #ifdef U_ENABLE_GENERIC_ISO_2022
610 myConverterData->isFirstBuffer = TRUE; 585 myConverterData->isFirstBuffer = TRUE;
611 586
612 /* append the UTF-8 escape sequence */ 587 /* append the UTF-8 escape sequence */
613 cnv->charErrorBufferLength = 3; 588 cnv->charErrorBufferLength = 3;
614 cnv->charErrorBuffer[0] = 0x1b; 589 cnv->charErrorBuffer[0] = 0x1b;
615 cnv->charErrorBuffer[1] = 0x25; 590 cnv->charErrorBuffer[1] = 0x25;
616 cnv->charErrorBuffer[2] = 0x42; 591 cnv->charErrorBuffer[2] = 0x42;
617 592
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
732 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 707 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
733 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE 708 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA TE
734 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 709 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
735 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE 710 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA TE
736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 711 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 712 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
738 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 713 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
739 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 714 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
740 }; 715 };
741 716
742 #if !UCONFIG_NO_NON_HTML5_CONVERSION
743 /*************** to unicode *******************/ 717 /*************** to unicode *******************/
744 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 718 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
745 /* 0 1 2 3 4 5 6 7 8 9 */ 719 /* 0 1 2 3 4 5 6 7 8 9 */
746 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 720 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 721 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 722 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
749 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 723 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 724 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
751 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 725 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
752 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE 726 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA TE
753 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 727 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
754 }; 728 };
755 #endif
756 729
757 730
758 static UCNV_TableStates_2022 731 static UCNV_TableStates_2022
759 getKey_2022(char c,int32_t* key,int32_t* offset){ 732 getKey_2022(char c,int32_t* key,int32_t* offset){
760 int32_t togo; 733 int32_t togo;
761 int32_t low = 0; 734 int32_t low = 0;
762 int32_t hi = MAX_STATES_2022; 735 int32_t hi = MAX_STATES_2022;
763 int32_t oldmid=0; 736 int32_t oldmid=0;
764 737
765 togo = normalize_esq_chars_2022[(uint8_t)c]; 738 togo = normalize_esq_chars_2022[(uint8_t)c];
766 if(togo == 0) { 739 if(togo == 0) {
767 /* not a valid character anywhere in an escape sequence */ 740 /* not a valid character anywhere in an escape sequence */
768 *key = 0; 741 *key = 0;
769 *offset = 0; 742 *offset = 0;
770 return INVALID_2022; 743 return INVALID_2022;
771 } 744 }
772 togo = (*key << 5) + togo; 745 togo = (*key << 5) + togo;
773 746
774 while (hi != low) /*binary search*/{ 747 while (hi != low) /*binary search*/{
775 748
776 register int32_t mid = (hi+low) >> 1; /*Finds median*/ 749 int32_t mid = (hi+low) >> 1; /*Finds median*/
777 750
778 if (mid == oldmid) 751 if (mid == oldmid)
779 break; 752 break;
780 753
781 if (escSeqStateTable_Key_2022[mid] > togo){ 754 if (escSeqStateTable_Key_2022[mid] > togo){
782 hi = mid; 755 hi = mid;
783 } 756 }
784 else if (escSeqStateTable_Key_2022[mid] < togo){ 757 else if (escSeqStateTable_Key_2022[mid] < togo){
785 low = mid; 758 low = mid;
786 } 759 }
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
898 if(myData2022->toU2022State.g<2) { 871 if(myData2022->toU2022State.g<2) {
899 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g; 872 myData2022->toU2022State.prevG=myData2022->toU2022St ate.g;
900 } 873 }
901 myData2022->toU2022State.g=2; 874 myData2022->toU2022State.g=2;
902 } else { 875 } else {
903 /* illegal to have SS2 before a matching designator */ 876 /* illegal to have SS2 before a matching designator */
904 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 877 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
905 } 878 }
906 break; 879 break;
907 /* case SS3_STATE: not used in ISO-2022-JP-x */ 880 /* case SS3_STATE: not used in ISO-2022-JP-x */
908 #if !UCONFIG_NO_NON_HTML5_CONVERSION
909 case ISO8859_1: 881 case ISO8859_1:
910 case ISO8859_7: 882 case ISO8859_7:
911 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 883 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
912 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 884 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
913 } else { 885 } else {
914 /* G2 charset for SS2 */ 886 /* G2 charset for SS2 */
915 myData2022->toU2022State.cs[2]=(int8_t)tempState; 887 myData2022->toU2022State.cs[2]=(int8_t)tempState;
916 } 888 }
917 break; 889 break;
918 #endif
919 default: 890 default:
920 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 891 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
921 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 892 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
922 } else { 893 } else {
923 /* G0 charset */ 894 /* G0 charset */
924 myData2022->toU2022State.cs[0]=(int8_t)tempState; 895 myData2022->toU2022State.cs[0]=(int8_t)tempState;
925 } 896 }
926 break; 897 break;
927 } 898 }
928 } 899 }
929 break; 900 break;
930 #if !UCONFIG_NO_NON_HTML5_CONVERSION
931 case ISO_2022_CN: 901 case ISO_2022_CN:
932 { 902 {
933 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 903 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
934 switch(tempState) { 904 switch(tempState) {
935 case INVALID_STATE: 905 case INVALID_STATE:
936 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 906 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
937 break; 907 break;
938 case SS2_STATE: 908 case SS2_STATE:
939 if(myData2022->toU2022State.cs[2]!=0) { 909 if(myData2022->toU2022State.cs[2]!=0) {
940 if(myData2022->toU2022State.g<2) { 910 if(myData2022->toU2022State.g<2) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
982 } 952 }
983 } 953 }
984 break; 954 break;
985 case ISO_2022_KR: 955 case ISO_2022_KR:
986 if(offset==0x30){ 956 if(offset==0x30){
987 /* nothing to be done, just accept this one escape sequence */ 957 /* nothing to be done, just accept this one escape sequence */
988 } else { 958 } else {
989 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 959 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
990 } 960 }
991 break; 961 break;
992 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
993 962
994 default: 963 default:
995 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 964 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
996 break; 965 break;
997 } 966 }
998 } 967 }
999 if(U_SUCCESS(*err)) { 968 if(U_SUCCESS(*err)) {
1000 _this->toULength = 0; 969 _this->toULength = 0;
1001 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { 970 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
1002 if(_this->toULength>1) { 971 if(_this->toULength>1) {
(...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after
1403 * KSC5601 : alias to ibm-949 mapping table 1372 * KSC5601 : alias to ibm-949 mapping table
1404 * GB2312 : alias to ibm-1386 mapping table 1373 * GB2312 : alias to ibm-1386 mapping table
1405 * ISO-8859-1 : Algorithmic implemented as LATIN1 case 1374 * ISO-8859-1 : Algorithmic implemented as LATIN1 case
1406 * ISO-8859-7 : alisas to ibm-9409 mapping table 1375 * ISO-8859-7 : alisas to ibm-9409 mapping table
1407 */ 1376 */
1408 1377
1409 /* preference order of JP charsets */ 1378 /* preference order of JP charsets */
1410 static const StateEnum jpCharsetPref[]={ 1379 static const StateEnum jpCharsetPref[]={
1411 ASCII, 1380 ASCII,
1412 JISX201, 1381 JISX201,
1413 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1414 ISO8859_1, 1382 ISO8859_1,
1415 ISO8859_7, 1383 ISO8859_7,
1416 #endif
1417 JISX208, 1384 JISX208,
1418 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1419 JISX212, 1385 JISX212,
1420 GB2312, 1386 GB2312,
1421 KSC5601, 1387 KSC5601,
1422 #endif
1423 HWKANA_7BIT 1388 HWKANA_7BIT
1424 }; 1389 };
1425 1390
1426 /* 1391 /*
1427 * The escape sequences must be in order of the enum constants like JISX201 = 3 , 1392 * The escape sequences must be in order of the enum constants like JISX201 = 3 ,
1428 * not in order of jpCharsetPref[]! 1393 * not in order of jpCharsetPref[]!
1429 */ 1394 */
1430 static const char escSeqChars[][6] ={ 1395 static const char escSeqChars[][6] ={
1431 "\x1B\x28\x42", /* <ESC>(B ASCII */ 1396 "\x1B\x28\x42", /* <ESC>(B ASCII */
1432 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ 1397 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
(...skipping 312 matching lines...) Expand 10 before | Expand all | Expand 10 after
1745 choices[choiceCount++] = cs = pFromU2022State->cs[0]; 1710 choices[choiceCount++] = cs = pFromU2022State->cs[0];
1746 csm &= ~CSM(cs); 1711 csm &= ~CSM(cs);
1747 1712
1748 /* try the current G2 charset */ 1713 /* try the current G2 charset */
1749 if((cs = pFromU2022State->cs[2]) != 0) { 1714 if((cs = pFromU2022State->cs[2]) != 0) {
1750 choices[choiceCount++] = cs; 1715 choices[choiceCount++] = cs;
1751 csm &= ~CSM(cs); 1716 csm &= ~CSM(cs);
1752 } 1717 }
1753 1718
1754 /* try all the other possible charsets */ 1719 /* try all the other possible charsets */
1755 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) { 1720 for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) {
1756 cs = (int8_t)jpCharsetPref[i]; 1721 cs = (int8_t)jpCharsetPref[i];
1757 if(CSM(cs) & csm) { 1722 if(CSM(cs) & csm) {
1758 choices[choiceCount++] = cs; 1723 choices[choiceCount++] = cs;
1759 csm &= ~CSM(cs); 1724 csm &= ~CSM(cs);
1760 } 1725 }
1761 } 1726 }
1762 } 1727 }
1763 1728
1764 cs = g = 0; 1729 cs = g = 0;
1765 /* 1730 /*
(...skipping 16 matching lines...) Expand all
1782 int8_t cs0 = choices[i]; 1747 int8_t cs0 = choices[i];
1783 switch(cs0) { 1748 switch(cs0) {
1784 case ASCII: 1749 case ASCII:
1785 if(sourceChar <= 0x7f) { 1750 if(sourceChar <= 0x7f) {
1786 targetValue = (uint32_t)sourceChar; 1751 targetValue = (uint32_t)sourceChar;
1787 len = 1; 1752 len = 1;
1788 cs = cs0; 1753 cs = cs0;
1789 g = 0; 1754 g = 0;
1790 } 1755 }
1791 break; 1756 break;
1792 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1793 case ISO8859_1: 1757 case ISO8859_1:
1794 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 1758 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1795 targetValue = (uint32_t)sourceChar - 0x80; 1759 targetValue = (uint32_t)sourceChar - 0x80;
1796 len = 1; 1760 len = 1;
1797 cs = cs0; 1761 cs = cs0;
1798 g = 2; 1762 g = 2;
1799 } 1763 }
1800 break; 1764 break;
1801 #endif
1802 case HWKANA_7BIT: 1765 case HWKANA_7BIT:
1803 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) { 1766 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW KANA_START)) {
1804 if(converterData->version==3) { 1767 if(converterData->version==3) {
1805 /* JIS7: use G1 (SO) */ 1768 /* JIS7: use G1 (SO) */
1806 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ 1769 /* Shift U+FF61..U+FF9F to bytes 21..5F. */
1807 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); 1770 targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
1808 len = 1; 1771 len = 1;
1809 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ 1772 pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
1810 g = 1; 1773 g = 1;
1811 } else if(converterData->version==4) { 1774 } else if(converterData->version==4) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1853 } 1816 }
1854 } else if(len == 0 && useFallback && 1817 } else if(len == 0 && useFallback &&
1855 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) { 1818 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E ND - HWKANA_START)) {
1856 targetValue = hwkana_fb[sourceChar - HWKANA_START]; 1819 targetValue = hwkana_fb[sourceChar - HWKANA_START];
1857 len = -2; 1820 len = -2;
1858 cs = cs0; 1821 cs = cs0;
1859 g = 0; 1822 g = 0;
1860 useFallback = FALSE; 1823 useFallback = FALSE;
1861 } 1824 }
1862 break; 1825 break;
1863 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1864 case ISO8859_7: 1826 case ISO8859_7:
1865 /* G0 SBCS forced to 7-bit output */ 1827 /* G0 SBCS forced to 7-bit output */
1866 len2 = MBCS_SINGLE_FROM_UCHAR32( 1828 len2 = MBCS_SINGLE_FROM_UCHAR32(
1867 converterData->myConverterArray[cs0], 1829 converterData->myConverterArray[cs0],
1868 sourceChar, &value, 1830 sourceChar, &value,
1869 useFallback); 1831 useFallback);
1870 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) { 1832 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val ue && value <= GR96_END) {
1871 targetValue = value - 0x80; 1833 targetValue = value - 0x80;
1872 len = len2; 1834 len = len2;
1873 cs = cs0; 1835 cs = cs0;
1874 g = 2; 1836 g = 2;
1875 useFallback = FALSE; 1837 useFallback = FALSE;
1876 } 1838 }
1877 break; 1839 break;
1878 #endif
1879 default: 1840 default:
1880 /* G0 DBCS */ 1841 /* G0 DBCS */
1881 len2 = MBCS_FROM_UCHAR32_ISO2022( 1842 len2 = MBCS_FROM_UCHAR32_ISO2022(
1882 converterData->myConverterArray[cs0], 1843 converterData->myConverterArray[cs0],
1883 sourceChar, &value, 1844 sourceChar, &value,
1884 useFallback, MBCS_OUTPUT_2); 1845 useFallback, MBCS_OUTPUT_2);
1885 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1846 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1886 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1887 if(cs0 == KSC5601) { 1847 if(cs0 == KSC5601) {
1888 /* 1848 /*
1889 * Check for valid bytes for the encoding scheme. 1849 * Check for valid bytes for the encoding scheme.
1890 * This is necessary because the sub-converter (wind ows-949) 1850 * This is necessary because the sub-converter (wind ows-949)
1891 * has a broader encoding scheme than is valid for 2 022. 1851 * has a broader encoding scheme than is valid for 2 022.
1892 */ 1852 */
1893 value = _2022FromGR94DBCS(value); 1853 value = _2022FromGR94DBCS(value);
1894 if(value == 0) { 1854 if(value == 0) {
1895 break; 1855 break;
1896 } 1856 }
1897 } 1857 }
1898 #endif
1899 targetValue = value; 1858 targetValue = value;
1900 len = len2; 1859 len = len2;
1901 cs = cs0; 1860 cs = cs0;
1902 g = 0; 1861 g = 0;
1903 useFallback = FALSE; 1862 useFallback = FALSE;
1904 } 1863 }
1905 break; 1864 break;
1906 } 1865 }
1907 } 1866 }
1908 1867
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
2182 /* return from a single-shift state to the previous one */ 2141 /* return from a single-shift state to the previous one */
2183 if(pToU2022State->g >= 2) { 2142 if(pToU2022State->g >= 2) {
2184 pToU2022State->g=pToU2022State->prevG; 2143 pToU2022State->g=pToU2022State->prevG;
2185 } 2144 }
2186 } else switch(cs) { 2145 } else switch(cs) {
2187 case ASCII: 2146 case ASCII:
2188 if(mySourceChar <= 0x7f) { 2147 if(mySourceChar <= 0x7f) {
2189 targetUniChar = mySourceChar; 2148 targetUniChar = mySourceChar;
2190 } 2149 }
2191 break; 2150 break;
2192 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2193 case ISO8859_1: 2151 case ISO8859_1:
2194 if(mySourceChar <= 0x7f) { 2152 if(mySourceChar <= 0x7f) {
2195 targetUniChar = mySourceChar + 0x80; 2153 targetUniChar = mySourceChar + 0x80;
2196 } 2154 }
2197 /* return from a single-shift state to the previous one */ 2155 /* return from a single-shift state to the previous one */
2198 pToU2022State->g=pToU2022State->prevG; 2156 pToU2022State->g=pToU2022State->prevG;
2199 break; 2157 break;
2200 case ISO8859_7: 2158 case ISO8859_7:
2201 if(mySourceChar <= 0x7f) { 2159 if(mySourceChar <= 0x7f) {
2202 /* convert mySourceChar+0x80 to use a normal 8-bit table */ 2160 /* convert mySourceChar+0x80 to use a normal 8-bit table */
2203 targetUniChar = 2161 targetUniChar =
2204 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( 2162 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
2205 myData->myConverterArray[cs], 2163 myData->myConverterArray[cs],
2206 mySourceChar + 0x80); 2164 mySourceChar + 0x80);
2207 } 2165 }
2208 /* return from a single-shift state to the previous one */ 2166 /* return from a single-shift state to the previous one */
2209 pToU2022State->g=pToU2022State->prevG; 2167 pToU2022State->g=pToU2022State->prevG;
2210 break; 2168 break;
2211 #endif
2212 case JISX201: 2169 case JISX201:
2213 if(mySourceChar <= 0x7f) { 2170 if(mySourceChar <= 0x7f) {
2214 targetUniChar = jisx201ToU(mySourceChar); 2171 targetUniChar = jisx201ToU(mySourceChar);
2215 } 2172 }
2216 break; 2173 break;
2217 case HWKANA_7BIT: 2174 case HWKANA_7BIT:
2218 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { 2175 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
2219 /* 7-bit halfwidth Katakana */ 2176 /* 7-bit halfwidth Katakana */
2220 targetUniChar = mySourceChar + (HWKANA_START - 0x21); 2177 targetUniChar = mySourceChar + (HWKANA_START - 0x21);
2221 } 2178 }
(...skipping 19 matching lines...) Expand all
2241 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ; 2198 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21) ;
2242 if (leadIsOk && trailIsOk) { 2199 if (leadIsOk && trailIsOk) {
2243 ++mySource; 2200 ++mySource;
2244 tmpSourceChar = (mySourceChar << 8) | trailByte; 2201 tmpSourceChar = (mySourceChar << 8) | trailByte;
2245 if(cs == JISX208) { 2202 if(cs == JISX208) {
2246 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf); 2203 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te mpBuf);
2247 mySourceChar = tmpSourceChar; 2204 mySourceChar = tmpSourceChar;
2248 } else { 2205 } else {
2249 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */ 2206 /* Copy before we modify tmpSourceChar so toUnic odeCallback() sees the correct bytes. */
2250 mySourceChar = tmpSourceChar; 2207 mySourceChar = tmpSourceChar;
2251 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2252 if (cs == KSC5601) { 2208 if (cs == KSC5601) {
2253 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */ 2209 tmpSourceChar += 0x8080; /* = _2022ToGR94DB CS(tmpSourceChar) */
2254 } 2210 }
2255 #endif
2256 tempBuf[0] = (char)(tmpSourceChar >> 8); 2211 tempBuf[0] = (char)(tmpSourceChar >> 8);
2257 tempBuf[1] = (char)(tmpSourceChar); 2212 tempBuf[1] = (char)(tmpSourceChar);
2258 } 2213 }
2259 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE); 2214 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData-> myConverterArray[cs], tempBuf, 2, FALSE);
2260 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 2215 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
2261 /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 2216 /* report a pair of illegal bytes if the second byte is not a DBCS starter */
2262 ++mySource; 2217 ++mySource;
2263 /* add another bit so that the code below writes 2 b ytes in case of error */ 2218 /* add another bit so that the code below writes 2 b ytes in case of error */
2264 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte; 2219 mySourceChar = 0x10000 | (mySourceChar << 8) | trail Byte;
2265 } 2220 }
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2307 *err =U_BUFFER_OVERFLOW_ERROR; 2262 *err =U_BUFFER_OVERFLOW_ERROR;
2308 break; 2263 break;
2309 } 2264 }
2310 } 2265 }
2311 endloop: 2266 endloop:
2312 args->target = myTarget; 2267 args->target = myTarget;
2313 args->source = mySource; 2268 args->source = mySource;
2314 } 2269 }
2315 2270
2316 2271
2317 #if !UCONFIG_NO_NON_HTML5_CONVERSION
2318 /*************************************************************** 2272 /***************************************************************
2319 * Rules for ISO-2022-KR encoding 2273 * Rules for ISO-2022-KR encoding
2320 * i) The KSC5601 designator sequence should appear only once in a file, 2274 * i) The KSC5601 designator sequence should appear only once in a file,
2321 * at the begining of a line before any KSC5601 characters. This usually 2275 * at the begining of a line before any KSC5601 characters. This usually
2322 * means that it appears by itself on the first line of the file 2276 * means that it appears by itself on the first line of the file
2323 * ii) There are only 2 shifting sequences SO to shift into double byte mode 2277 * ii) There are only 2 shifting sequences SO to shift into double byte mode
2324 * and SI to shift into single byte mode 2278 * and SI to shift into single byte mode
2325 */ 2279 */
2326 static void 2280 static void
2327 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2281 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after
3451 } 3405 }
3452 else{ 3406 else{
3453 *err =U_BUFFER_OVERFLOW_ERROR; 3407 *err =U_BUFFER_OVERFLOW_ERROR;
3454 break; 3408 break;
3455 } 3409 }
3456 } 3410 }
3457 endloop: 3411 endloop:
3458 args->target = myTarget; 3412 args->target = myTarget;
3459 args->source = mySource; 3413 args->source = mySource;
3460 } 3414 }
3461 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
3462 3415
3463 static void 3416 static void
3464 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) { 3417 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC ode *err) {
3465 UConverter *cnv = args->converter; 3418 UConverter *cnv = args->converter;
3466 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo; 3419 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI nfo;
3467 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; 3420 ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3468 char *p, *subchar; 3421 char *p, *subchar;
3469 char buffer[8]; 3422 char buffer[8];
3470 int32_t length; 3423 int32_t length;
3471 3424
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
3653 #endif 3606 #endif
3654 3607
3655 cnvData = (UConverterDataISO2022*)cnv->extraInfo; 3608 cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3656 3609
3657 /* open a set and initialize it with code points that are algorithmically ro und-tripped */ 3610 /* open a set and initialize it with code points that are algorithmically ro und-tripped */
3658 switch(cnvData->locale[0]){ 3611 switch(cnvData->locale[0]){
3659 case 'j': 3612 case 'j':
3660 /* include JIS X 0201 which is hardcoded */ 3613 /* include JIS X 0201 which is hardcoded */
3661 sa->add(sa->set, 0xa5); 3614 sa->add(sa->set, 0xa5);
3662 sa->add(sa->set, 0x203e); 3615 sa->add(sa->set, 0x203e);
3663 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3664 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3616 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3665 /* include Latin-1 for some variants of JP */ 3617 /* include Latin-1 for some variants of JP */
3666 sa->addRange(sa->set, 0, 0xff); 3618 sa->addRange(sa->set, 0, 0xff);
3667 } else { 3619 } else {
3668 /* include ASCII for JP */ 3620 /* include ASCII for JP */
3669 sa->addRange(sa->set, 0, 0x7f); 3621 sa->addRange(sa->set, 0, 0x7f);
3670 } 3622 }
3671 #else
3672 /* include ASCII for JP */
3673 sa->addRange(sa->set, 0, 0x7f);
3674 #endif
3675 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) { 3623 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) {
3676 /* 3624 /*
3677 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0 3625 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0
3678 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) 3626 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3679 * use half-width Katakana. 3627 * use half-width Katakana.
3680 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) 3628 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3681 * half-width Katakana via the ESC ( I sequence. 3629 * half-width Katakana via the ESC ( I sequence.
3682 * However, we only emit (fromUnicode) half-width Katakana according to the 3630 * However, we only emit (fromUnicode) half-width Katakana according to the
3683 * definition of each variant. 3631 * definition of each variant.
3684 * 3632 *
3685 * When including fallbacks, 3633 * When including fallbacks,
3686 * we need to include half-width Katakana Unicode code points for al l JP variants because 3634 * we need to include half-width Katakana Unicode code points for al l JP variants because
3687 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana). 3635 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana).
3688 */ 3636 */
3689 /* include half-width Katakana for JP */ 3637 /* include half-width Katakana for JP */
3690 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3638 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3691 } 3639 }
3692 break; 3640 break;
3693 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3694 case 'c': 3641 case 'c':
3695 case 'z': 3642 case 'z':
3696 /* include ASCII for CN */ 3643 /* include ASCII for CN */
3697 sa->addRange(sa->set, 0, 0x7f); 3644 sa->addRange(sa->set, 0, 0x7f);
3698 break; 3645 break;
3699 case 'k': 3646 case 'k':
3700 /* there is only one converter for KR, and it is not in the myConverterA rray[] */ 3647 /* there is only one converter for KR, and it is not in the myConverterA rray[] */
3701 cnvData->currentConverter->sharedData->impl->getUnicodeSet( 3648 cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3702 cnvData->currentConverter, sa, which, pErrorCode); 3649 cnvData->currentConverter, sa, which, pErrorCode);
3703 /* the loop over myConverterArray[] will simply not find another convert er */ 3650 /* the loop over myConverterArray[] will simply not find another convert er */
3704 break; 3651 break;
3705 #endif
3706 default: 3652 default:
3707 break; 3653 break;
3708 } 3654 }
3709 3655
3710 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem ent ucnv_getUnicodeSet() with reverse fallbacks. */ 3656 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem ent ucnv_getUnicodeSet() with reverse fallbacks. */
3711 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3657 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3712 cnvData->version==0 && i==CNS_11643 3658 cnvData->version==0 && i==CNS_11643
3713 ) { 3659 ) {
3714 /* special handling for non-EXT ISO-2022-CN: add only code point s for CNS planes 1 and 2 */ 3660 /* special handling for non-EXT ISO-2022-CN: add only code point s for CNS planes 1 and 2 */
3715 ucnv_MBCSGetUnicodeSetForBytes( 3661 ucnv_MBCSGetUnicodeSetForBytes(
3716 cnvData->myConverterArray[i], 3662 cnvData->myConverterArray[i],
3717 sa, UCNV_ROUNDTRIP_SET, 3663 sa, UCNV_ROUNDTRIP_SET,
3718 0, 0x81, 0x82, 3664 0, 0x81, 0x82,
3719 pErrorCode); 3665 pErrorCode);
3720 } 3666 }
3721 #endif 3667 #endif
3722 3668
3723 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 3669 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
3724 UConverterSetFilter filter; 3670 UConverterSetFilter filter;
3725 if(cnvData->myConverterArray[i]!=NULL) { 3671 if(cnvData->myConverterArray[i]!=NULL) {
3726 if(cnvData->locale[0]=='j' && i==JISX208) { 3672 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3727 /* 3673 cnvData->version==0 && i==CNS_11643
3728 * Only add code points that map to Shift-JIS codes 3674 ) {
3729 * corresponding to JIS X 0208.
3730 */
3731 filter=UCNV_SET_FILTER_SJIS;
3732 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3733 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3734 cnvData->version==0 && i==CNS_11643) {
3735 /* 3675 /*
3736 * Version-specific for CN: 3676 * Version-specific for CN:
3737 * CN version 0 does not map CNS planes 3..7 although 3677 * CN version 0 does not map CNS planes 3..7 although
3738 * they are all available in the CNS conversion table; 3678 * they are all available in the CNS conversion table;
3739 * CN version 1 (-EXT) does map them all. 3679 * CN version 1 (-EXT) does map them all.
3740 * The two versions create different Unicode sets. 3680 * The two versions create different Unicode sets.
3741 */ 3681 */
3742 filter=UCNV_SET_FILTER_2022_CN; 3682 filter=UCNV_SET_FILTER_2022_CN;
3683 } else if(cnvData->locale[0]=='j' && i==JISX208) {
3684 /*
3685 * Only add code points that map to Shift-JIS codes
3686 * corresponding to JIS X 0208.
3687 */
3688 filter=UCNV_SET_FILTER_SJIS;
3743 } else if(i==KSC5601) { 3689 } else if(i==KSC5601) {
3744 /* 3690 /*
3745 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) 3691 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
3746 * are broader than GR94. 3692 * are broader than GR94.
3747 */ 3693 */
3748 filter=UCNV_SET_FILTER_GR94DBCS; 3694 filter=UCNV_SET_FILTER_GR94DBCS;
3749 #endif
3750 } else { 3695 } else {
3751 filter=UCNV_SET_FILTER_NONE; 3696 filter=UCNV_SET_FILTER_NONE;
3752 } 3697 }
3753 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode); 3698 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode);
3754 } 3699 }
3755 } 3700 }
3756 3701
3757 /* 3702 /*
3758 * ISO 2022 converters must not convert SO/SI/ESC despite what 3703 * ISO 2022 converters must not convert SO/SI/ESC despite what
3759 * sub-converters do by themselves. 3704 * sub-converters do by themselves.
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
3877 NULL, 3822 NULL,
3878 NULL, 3823 NULL,
3879 &_ISO2022JPStaticData, 3824 &_ISO2022JPStaticData,
3880 FALSE, 3825 FALSE,
3881 &_ISO2022JPImpl, 3826 &_ISO2022JPImpl,
3882 0, UCNV_MBCS_TABLE_INITIALIZER 3827 0, UCNV_MBCS_TABLE_INITIALIZER
3883 }; 3828 };
3884 3829
3885 } // namespace 3830 } // namespace
3886 3831
3887 #if !UCONFIG_NO_NON_HTML5_CONVERSION
3888 /************* KR ***************/ 3832 /************* KR ***************/
3889 static const UConverterImpl _ISO2022KRImpl={ 3833 static const UConverterImpl _ISO2022KRImpl={
3890 UCNV_ISO_2022, 3834 UCNV_ISO_2022,
3891 3835
3892 NULL, 3836 NULL,
3893 NULL, 3837 NULL,
3894 3838
3895 _ISO2022Open, 3839 _ISO2022Open,
3896 _ISO2022Close, 3840 _ISO2022Close,
3897 _ISO2022Reset, 3841 _ISO2022Reset,
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
3994 ~((uint32_t) 0), 3938 ~((uint32_t) 0),
3995 NULL, 3939 NULL,
3996 NULL, 3940 NULL,
3997 &_ISO2022CNStaticData, 3941 &_ISO2022CNStaticData,
3998 FALSE, 3942 FALSE,
3999 &_ISO2022CNImpl, 3943 &_ISO2022CNImpl,
4000 0, UCNV_MBCS_TABLE_INITIALIZER 3944 0, UCNV_MBCS_TABLE_INITIALIZER
4001 }; 3945 };
4002 3946
4003 } // namespace 3947 } // namespace
4004 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
4005 3948
4006 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 3949 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
OLDNEW
« no previous file with comments | « source/common/ucmndata.h ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698