OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2000-2012, International Business Machines | 3 * Copyright (C) 2000-2012, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * file name: ucnv2022.cpp | 6 * file name: ucnv2022.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
147 CNS_11643_1, | 147 CNS_11643_1, |
148 CNS_11643_2, | 148 CNS_11643_2, |
149 CNS_11643_3, | 149 CNS_11643_3, |
150 CNS_11643_4, | 150 CNS_11643_4, |
151 CNS_11643_5, | 151 CNS_11643_5, |
152 CNS_11643_6, | 152 CNS_11643_6, |
153 CNS_11643_7 | 153 CNS_11643_7 |
154 } StateEnum; | 154 } StateEnum; |
155 | 155 |
156 /* is the StateEnum charset value for a DBCS charset? */ | 156 /* is the StateEnum charset value for a DBCS charset? */ |
| 157 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 158 #define IS_JP_DBCS(cs) (JISX208==(cs)) |
| 159 #else |
157 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) | 160 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
| 161 #endif |
158 | 162 |
159 #define CSM(cs) ((uint16_t)1<<(cs)) | 163 #define CSM(cs) ((uint16_t)1<<(cs)) |
160 | 164 |
161 /* | 165 /* |
162 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence | 166 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence |
163 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x | 167 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x |
164 * | 168 * |
165 * Note: The converter uses some leniency: | 169 * Note: The converter uses some leniency: |
166 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in | 170 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in |
167 * all versions, not just JIS7 and JIS8. | 171 * all versions, not just JIS7 and JIS8. |
168 * - ICU does not distinguish between different versions of JIS X 0208. | 172 * - ICU does not distinguish between different versions of JIS X 0208. |
169 */ | 173 */ |
170 #if UCONFIG_NO_NON_HTML5_CONVERSION | 174 #if UCONFIG_NO_NON_HTML5_CONVERSION |
171 enum { MAX_JA_VERSION=0 }; | 175 enum { MAX_JA_VERSION=0 }; |
172 #else | 176 #else |
173 enum { MAX_JA_VERSION=4 }; | 177 enum { MAX_JA_VERSION=4 }; |
174 #endif | 178 #endif |
175 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 179 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
| 180 /* |
| 181 * TODO(jshin): The encoding spec has JISX212, but we don't support it. |
| 182 * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 |
| 183 */ |
176 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 184 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
177 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 185 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
178 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 186 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
179 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 187 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
180 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 188 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
181 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 189 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
182 #endif | 190 #endif |
183 }; | 191 }; |
184 | 192 |
185 typedef enum { | 193 typedef enum { |
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
359 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_
2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 367 VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_
2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
360 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 368 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
361 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 | 369 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 |
362 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 370 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
363 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 371 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
364 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 372 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
365 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 373 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
366 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 | 374 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 |
367 }; | 375 }; |
368 | 376 |
369 | |
370 /* Enable ISO-2022-{KR,CN,CN-Ext} for now. | |
371 * TODO(jshin): Disable it when we know what to do about 'replacement' | |
372 * encodings. See http://crbug.com/277037 and | |
373 * https://codereview.chromium.org/145973021/ | |
374 */ | |
375 #ifndef U_ENABLE_ISO_2022_KR_CN | |
376 #define U_ENABLE_ISO_2022_KR_CN 1 | |
377 #endif | |
378 | |
379 /* Type def for refactoring changeState_2022 code*/ | 377 /* Type def for refactoring changeState_2022 code*/ |
380 typedef enum{ | 378 typedef enum{ |
381 #ifdef U_ENABLE_GENERIC_ISO_2022 | 379 #ifdef U_ENABLE_GENERIC_ISO_2022 |
382 ISO_2022=0, | 380 ISO_2022=0, |
383 #endif | 381 #endif |
| 382 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 383 ISO_2022_JP=1 |
| 384 #else |
384 ISO_2022_JP=1, | 385 ISO_2022_JP=1, |
385 #ifdef U_ENABLE_ISO_2022_KR_CN | |
386 ISO_2022_KR=2, | 386 ISO_2022_KR=2, |
387 ISO_2022_CN=3 | 387 ISO_2022_CN=3 |
388 #endif | 388 #endif |
389 } Variant2022; | 389 } Variant2022; |
390 | 390 |
391 /*********** ISO 2022 Converter Protos ***********/ | 391 /*********** ISO 2022 Converter Protos ***********/ |
392 static void | 392 static void |
393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); | 393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); |
394 | 394 |
395 static void | 395 static void |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
510 #endif | 510 #endif |
511 myConverterData->myConverterArray[JISX208] = | 511 myConverterData->myConverterArray[JISX208] = |
512 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); | 512 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); |
513 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 513 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
514 if(jpCharsetMasks[version]&CSM(JISX212)) { | 514 if(jpCharsetMasks[version]&CSM(JISX212)) { |
515 myConverterData->myConverterArray[JISX212] = | 515 myConverterData->myConverterArray[JISX212] = |
516 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); | 516 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); |
517 } | 517 } |
518 if(jpCharsetMasks[version]&CSM(GB2312)) { | 518 if(jpCharsetMasks[version]&CSM(GB2312)) { |
519 myConverterData->myConverterArray[GB2312] = | 519 myConverterData->myConverterArray[GB2312] = |
520 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackAr
gs, errorCode); /* gb_2312_80-1 */ | 520 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ |
521 } | 521 } |
522 if(jpCharsetMasks[version]&CSM(KSC5601)) { | 522 if(jpCharsetMasks[version]&CSM(KSC5601)) { |
523 myConverterData->myConverterArray[KSC5601] = | 523 myConverterData->myConverterArray[KSC5601] = |
524 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); | 524 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); |
525 } | 525 } |
526 #endif | 526 #endif |
527 | 527 |
528 /* set the function pointers to appropriate funtions */ | 528 /* set the function pointers to appropriate funtions */ |
529 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 529 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
530 uprv_strcpy(myConverterData->locale,"ja"); | 530 uprv_strcpy(myConverterData->locale,"ja"); |
531 | 531 |
532 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); | 532 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); |
533 len = uprv_strlen(myConverterData->name); | 533 len = uprv_strlen(myConverterData->name); |
534 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; | 534 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; |
535 myConverterData->name[len+1]='\0'; | 535 myConverterData->name[len+1]='\0'; |
536 } | 536 } |
537 #ifdef U_ENABLE_ISO_2022_KR_CN | 537 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
538 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 538 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
539 (myLocale[2]=='_' || myLocale[2]=='\0')) | 539 (myLocale[2]=='_' || myLocale[2]=='\0')) |
540 { | 540 { |
541 const char *cnvName; | 541 const char *cnvName; |
542 if(version==1) { | 542 if(version==1) { |
543 cnvName="icu-internal-25546"; | 543 cnvName="icu-internal-25546"; |
544 } else { | 544 } else { |
545 cnvName="ibm-949"; | 545 cnvName="ibm-949"; |
546 myConverterData->version=version=0; | 546 myConverterData->version=version=0; |
547 } | 547 } |
(...skipping 25 matching lines...) Expand all Loading... |
573 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; | 573 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; |
574 uprv_strcpy(myConverterData->locale,"ko"); | 574 uprv_strcpy(myConverterData->locale,"ko"); |
575 } | 575 } |
576 } | 576 } |
577 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m
yLocale[1]=='n'))&& | 577 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m
yLocale[1]=='n'))&& |
578 (myLocale[2]=='_' || myLocale[2]=='\0')) | 578 (myLocale[2]=='_' || myLocale[2]=='\0')) |
579 { | 579 { |
580 | 580 |
581 /* open the required converters and cache them */ | 581 /* open the required converters and cache them */ |
582 myConverterData->myConverterArray[GB2312_1] = | 582 myConverterData->myConverterArray[GB2312_1] = |
583 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs,
errorCode); | 583 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorC
ode); |
584 if(version==1) { | 584 if(version==1) { |
585 myConverterData->myConverterArray[ISO_IR_165] = | 585 myConverterData->myConverterArray[ISO_IR_165] = |
586 ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackA
rgs, errorCode); | 586 ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs,
errorCode); |
587 } | 587 } |
588 myConverterData->myConverterArray[CNS_11643] = | 588 myConverterData->myConverterArray[CNS_11643] = |
589 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs,
errorCode); | 589 ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs,
errorCode); |
590 | 590 |
591 | 591 |
592 /* set the function pointers to appropriate funtions */ | 592 /* set the function pointers to appropriate funtions */ |
593 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; | 593 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; |
594 uprv_strcpy(myConverterData->locale,"cn"); | 594 uprv_strcpy(myConverterData->locale,"cn"); |
595 | 595 |
596 if (version==0){ | 596 if (version==0){ |
597 myConverterData->version = 0; | 597 myConverterData->version = 0; |
598 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); | 598 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); |
599 }else if (version==1){ | 599 }else if (version==1){ |
600 myConverterData->version = 1; | 600 myConverterData->version = 1; |
601 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); | 601 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); |
602 }else { | 602 }else { |
603 myConverterData->version = 2; | 603 myConverterData->version = 2; |
604 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); | 604 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); |
605 } | 605 } |
606 } | 606 } |
607 #endif // U_ENABLE_ISO_2022_KR_CN | 607 #endif // !UCONFIG_NO_NON_HTML5_CONVERSION |
608 else{ | 608 else{ |
609 #ifdef U_ENABLE_GENERIC_ISO_2022 | 609 #ifdef U_ENABLE_GENERIC_ISO_2022 |
610 myConverterData->isFirstBuffer = TRUE; | 610 myConverterData->isFirstBuffer = TRUE; |
611 | 611 |
612 /* append the UTF-8 escape sequence */ | 612 /* append the UTF-8 escape sequence */ |
613 cnv->charErrorBufferLength = 3; | 613 cnv->charErrorBufferLength = 3; |
614 cnv->charErrorBuffer[0] = 0x1b; | 614 cnv->charErrorBuffer[0] = 0x1b; |
615 cnv->charErrorBuffer[1] = 0x25; | 615 cnv->charErrorBuffer[1] = 0x25; |
616 cnv->charErrorBuffer[2] = 0x42; | 616 cnv->charErrorBuffer[2] = 0x42; |
617 | 617 |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
732 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 732 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
733 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE | 733 ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STA
TE |
734 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 734 ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
735 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE | 735 ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STA
TE |
736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 736 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 737 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
738 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 738 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
739 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 739 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
740 }; | 740 }; |
741 | 741 |
| 742 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
742 /*************** to unicode *******************/ | 743 /*************** to unicode *******************/ |
743 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | 744 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
744 /* 0 1 2 3 4
5 6 7 8 9 */ | 745 /* 0 1 2 3 4
5 6 7 8 9 */ |
745 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 746 INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
746 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
747 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
748 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 749 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
749 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 | 750 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 |
750 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 751 ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5
,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
751 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE | 752 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STA
TE |
752 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 753 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
753 }; | 754 }; |
| 755 #endif |
754 | 756 |
755 | 757 |
756 static UCNV_TableStates_2022 | 758 static UCNV_TableStates_2022 |
757 getKey_2022(char c,int32_t* key,int32_t* offset){ | 759 getKey_2022(char c,int32_t* key,int32_t* offset){ |
758 int32_t togo; | 760 int32_t togo; |
759 int32_t low = 0; | 761 int32_t low = 0; |
760 int32_t hi = MAX_STATES_2022; | 762 int32_t hi = MAX_STATES_2022; |
761 int32_t oldmid=0; | 763 int32_t oldmid=0; |
762 | 764 |
763 togo = normalize_esq_chars_2022[(uint8_t)c]; | 765 togo = normalize_esq_chars_2022[(uint8_t)c]; |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
896 if(myData2022->toU2022State.g<2) { | 898 if(myData2022->toU2022State.g<2) { |
897 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; | 899 myData2022->toU2022State.prevG=myData2022->toU2022St
ate.g; |
898 } | 900 } |
899 myData2022->toU2022State.g=2; | 901 myData2022->toU2022State.g=2; |
900 } else { | 902 } else { |
901 /* illegal to have SS2 before a matching designator */ | 903 /* illegal to have SS2 before a matching designator */ |
902 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 904 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
903 } | 905 } |
904 break; | 906 break; |
905 /* case SS3_STATE: not used in ISO-2022-JP-x */ | 907 /* case SS3_STATE: not used in ISO-2022-JP-x */ |
| 908 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
906 case ISO8859_1: | 909 case ISO8859_1: |
907 case ISO8859_7: | 910 case ISO8859_7: |
908 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 911 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
909 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 912 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
910 } else { | 913 } else { |
911 /* G2 charset for SS2 */ | 914 /* G2 charset for SS2 */ |
912 myData2022->toU2022State.cs[2]=(int8_t)tempState; | 915 myData2022->toU2022State.cs[2]=(int8_t)tempState; |
913 } | 916 } |
914 break; | 917 break; |
| 918 #endif |
915 default: | 919 default: |
916 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { | 920 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) ==
0) { |
917 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 921 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
918 } else { | 922 } else { |
919 /* G0 charset */ | 923 /* G0 charset */ |
920 myData2022->toU2022State.cs[0]=(int8_t)tempState; | 924 myData2022->toU2022State.cs[0]=(int8_t)tempState; |
921 } | 925 } |
922 break; | 926 break; |
923 } | 927 } |
924 } | 928 } |
925 break; | 929 break; |
| 930 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
926 case ISO_2022_CN: | 931 case ISO_2022_CN: |
927 { | 932 { |
928 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; | 933 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
929 switch(tempState) { | 934 switch(tempState) { |
930 case INVALID_STATE: | 935 case INVALID_STATE: |
931 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 936 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
932 break; | 937 break; |
933 case SS2_STATE: | 938 case SS2_STATE: |
934 if(myData2022->toU2022State.cs[2]!=0) { | 939 if(myData2022->toU2022State.cs[2]!=0) { |
935 if(myData2022->toU2022State.g<2) { | 940 if(myData2022->toU2022State.g<2) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
977 } | 982 } |
978 } | 983 } |
979 break; | 984 break; |
980 case ISO_2022_KR: | 985 case ISO_2022_KR: |
981 if(offset==0x30){ | 986 if(offset==0x30){ |
982 /* nothing to be done, just accept this one escape sequence */ | 987 /* nothing to be done, just accept this one escape sequence */ |
983 } else { | 988 } else { |
984 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 989 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
985 } | 990 } |
986 break; | 991 break; |
| 992 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
987 | 993 |
988 default: | 994 default: |
989 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 995 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
990 break; | 996 break; |
991 } | 997 } |
992 } | 998 } |
993 if(U_SUCCESS(*err)) { | 999 if(U_SUCCESS(*err)) { |
994 _this->toULength = 0; | 1000 _this->toULength = 0; |
995 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { | 1001 } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { |
996 if(_this->toULength>1) { | 1002 if(_this->toULength>1) { |
(...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1397 * KSC5601 : alias to ibm-949 mapping table | 1403 * KSC5601 : alias to ibm-949 mapping table |
1398 * GB2312 : alias to ibm-1386 mapping table | 1404 * GB2312 : alias to ibm-1386 mapping table |
1399 * ISO-8859-1 : Algorithmic implemented as LATIN1 case | 1405 * ISO-8859-1 : Algorithmic implemented as LATIN1 case |
1400 * ISO-8859-7 : alisas to ibm-9409 mapping table | 1406 * ISO-8859-7 : alisas to ibm-9409 mapping table |
1401 */ | 1407 */ |
1402 | 1408 |
1403 /* preference order of JP charsets */ | 1409 /* preference order of JP charsets */ |
1404 static const StateEnum jpCharsetPref[]={ | 1410 static const StateEnum jpCharsetPref[]={ |
1405 ASCII, | 1411 ASCII, |
1406 JISX201, | 1412 JISX201, |
| 1413 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1407 ISO8859_1, | 1414 ISO8859_1, |
1408 ISO8859_7, | 1415 ISO8859_7, |
| 1416 #endif |
1409 JISX208, | 1417 JISX208, |
| 1418 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1410 JISX212, | 1419 JISX212, |
1411 GB2312, | 1420 GB2312, |
1412 KSC5601, | 1421 KSC5601, |
| 1422 #endif |
1413 HWKANA_7BIT | 1423 HWKANA_7BIT |
1414 }; | 1424 }; |
1415 | 1425 |
1416 /* | 1426 /* |
1417 * The escape sequences must be in order of the enum constants like JISX201 = 3
, | 1427 * The escape sequences must be in order of the enum constants like JISX201 = 3
, |
1418 * not in order of jpCharsetPref[]! | 1428 * not in order of jpCharsetPref[]! |
1419 */ | 1429 */ |
1420 static const char escSeqChars[][6] ={ | 1430 static const char escSeqChars[][6] ={ |
1421 "\x1B\x28\x42", /* <ESC>(B ASCII */ | 1431 "\x1B\x28\x42", /* <ESC>(B ASCII */ |
1422 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ | 1432 "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ |
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1772 int8_t cs0 = choices[i]; | 1782 int8_t cs0 = choices[i]; |
1773 switch(cs0) { | 1783 switch(cs0) { |
1774 case ASCII: | 1784 case ASCII: |
1775 if(sourceChar <= 0x7f) { | 1785 if(sourceChar <= 0x7f) { |
1776 targetValue = (uint32_t)sourceChar; | 1786 targetValue = (uint32_t)sourceChar; |
1777 len = 1; | 1787 len = 1; |
1778 cs = cs0; | 1788 cs = cs0; |
1779 g = 0; | 1789 g = 0; |
1780 } | 1790 } |
1781 break; | 1791 break; |
| 1792 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1782 case ISO8859_1: | 1793 case ISO8859_1: |
1783 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { | 1794 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
1784 targetValue = (uint32_t)sourceChar - 0x80; | 1795 targetValue = (uint32_t)sourceChar - 0x80; |
1785 len = 1; | 1796 len = 1; |
1786 cs = cs0; | 1797 cs = cs0; |
1787 g = 2; | 1798 g = 2; |
1788 } | 1799 } |
1789 break; | 1800 break; |
| 1801 #endif |
1790 case HWKANA_7BIT: | 1802 case HWKANA_7BIT: |
1791 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { | 1803 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HW
KANA_START)) { |
1792 if(converterData->version==3) { | 1804 if(converterData->version==3) { |
1793 /* JIS7: use G1 (SO) */ | 1805 /* JIS7: use G1 (SO) */ |
1794 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ | 1806 /* Shift U+FF61..U+FF9F to bytes 21..5F. */ |
1795 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); | 1807 targetValue = (uint32_t)(sourceChar - (HWKANA_START
- 0x21)); |
1796 len = 1; | 1808 len = 1; |
1797 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ | 1809 pFromU2022State->cs[1] = cs = cs0; /* do not output
an escape sequence */ |
1798 g = 1; | 1810 g = 1; |
1799 } else if(converterData->version==4) { | 1811 } else if(converterData->version==4) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1841 } | 1853 } |
1842 } else if(len == 0 && useFallback && | 1854 } else if(len == 0 && useFallback && |
1843 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { | 1855 (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_E
ND - HWKANA_START)) { |
1844 targetValue = hwkana_fb[sourceChar - HWKANA_START]; | 1856 targetValue = hwkana_fb[sourceChar - HWKANA_START]; |
1845 len = -2; | 1857 len = -2; |
1846 cs = cs0; | 1858 cs = cs0; |
1847 g = 0; | 1859 g = 0; |
1848 useFallback = FALSE; | 1860 useFallback = FALSE; |
1849 } | 1861 } |
1850 break; | 1862 break; |
| 1863 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1851 case ISO8859_7: | 1864 case ISO8859_7: |
1852 /* G0 SBCS forced to 7-bit output */ | 1865 /* G0 SBCS forced to 7-bit output */ |
1853 len2 = MBCS_SINGLE_FROM_UCHAR32( | 1866 len2 = MBCS_SINGLE_FROM_UCHAR32( |
1854 converterData->myConverterArray[cs0], | 1867 converterData->myConverterArray[cs0], |
1855 sourceChar, &value, | 1868 sourceChar, &value, |
1856 useFallback); | 1869 useFallback); |
1857 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { | 1870 if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= val
ue && value <= GR96_END) { |
1858 targetValue = value - 0x80; | 1871 targetValue = value - 0x80; |
1859 len = len2; | 1872 len = len2; |
1860 cs = cs0; | 1873 cs = cs0; |
1861 g = 2; | 1874 g = 2; |
1862 useFallback = FALSE; | 1875 useFallback = FALSE; |
1863 } | 1876 } |
1864 break; | 1877 break; |
| 1878 #endif |
1865 default: | 1879 default: |
1866 /* G0 DBCS */ | 1880 /* G0 DBCS */ |
1867 len2 = MBCS_FROM_UCHAR32_ISO2022( | 1881 len2 = MBCS_FROM_UCHAR32_ISO2022( |
1868 converterData->myConverterArray[cs0], | 1882 converterData->myConverterArray[cs0], |
1869 sourceChar, &value, | 1883 sourceChar, &value, |
1870 useFallback, MBCS_OUTPUT_2); | 1884 useFallback, MBCS_OUTPUT_2); |
1871 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ | 1885 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ |
| 1886 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
1872 if(cs0 == KSC5601) { | 1887 if(cs0 == KSC5601) { |
1873 /* | 1888 /* |
1874 * Check for valid bytes for the encoding scheme. | 1889 * Check for valid bytes for the encoding scheme. |
1875 * This is necessary because the sub-converter (wind
ows-949) | 1890 * This is necessary because the sub-converter (wind
ows-949) |
1876 * has a broader encoding scheme than is valid for 2
022. | 1891 * has a broader encoding scheme than is valid for 2
022. |
1877 */ | 1892 */ |
1878 value = _2022FromGR94DBCS(value); | 1893 value = _2022FromGR94DBCS(value); |
1879 if(value == 0) { | 1894 if(value == 0) { |
1880 break; | 1895 break; |
1881 } | 1896 } |
1882 } | 1897 } |
| 1898 #endif |
1883 targetValue = value; | 1899 targetValue = value; |
1884 len = len2; | 1900 len = len2; |
1885 cs = cs0; | 1901 cs = cs0; |
1886 g = 0; | 1902 g = 0; |
1887 useFallback = FALSE; | 1903 useFallback = FALSE; |
1888 } | 1904 } |
1889 break; | 1905 break; |
1890 } | 1906 } |
1891 } | 1907 } |
1892 | 1908 |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2166 /* return from a single-shift state to the previous one */ | 2182 /* return from a single-shift state to the previous one */ |
2167 if(pToU2022State->g >= 2) { | 2183 if(pToU2022State->g >= 2) { |
2168 pToU2022State->g=pToU2022State->prevG; | 2184 pToU2022State->g=pToU2022State->prevG; |
2169 } | 2185 } |
2170 } else switch(cs) { | 2186 } else switch(cs) { |
2171 case ASCII: | 2187 case ASCII: |
2172 if(mySourceChar <= 0x7f) { | 2188 if(mySourceChar <= 0x7f) { |
2173 targetUniChar = mySourceChar; | 2189 targetUniChar = mySourceChar; |
2174 } | 2190 } |
2175 break; | 2191 break; |
| 2192 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
2176 case ISO8859_1: | 2193 case ISO8859_1: |
2177 if(mySourceChar <= 0x7f) { | 2194 if(mySourceChar <= 0x7f) { |
2178 targetUniChar = mySourceChar + 0x80; | 2195 targetUniChar = mySourceChar + 0x80; |
2179 } | 2196 } |
2180 /* return from a single-shift state to the previous one */ | 2197 /* return from a single-shift state to the previous one */ |
2181 pToU2022State->g=pToU2022State->prevG; | 2198 pToU2022State->g=pToU2022State->prevG; |
2182 break; | 2199 break; |
2183 case ISO8859_7: | 2200 case ISO8859_7: |
2184 if(mySourceChar <= 0x7f) { | 2201 if(mySourceChar <= 0x7f) { |
2185 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ | 2202 /* convert mySourceChar+0x80 to use a normal 8-bit table
*/ |
2186 targetUniChar = | 2203 targetUniChar = |
2187 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( | 2204 _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( |
2188 myData->myConverterArray[cs], | 2205 myData->myConverterArray[cs], |
2189 mySourceChar + 0x80); | 2206 mySourceChar + 0x80); |
2190 } | 2207 } |
2191 /* return from a single-shift state to the previous one */ | 2208 /* return from a single-shift state to the previous one */ |
2192 pToU2022State->g=pToU2022State->prevG; | 2209 pToU2022State->g=pToU2022State->prevG; |
2193 break; | 2210 break; |
| 2211 #endif |
2194 case JISX201: | 2212 case JISX201: |
2195 if(mySourceChar <= 0x7f) { | 2213 if(mySourceChar <= 0x7f) { |
2196 targetUniChar = jisx201ToU(mySourceChar); | 2214 targetUniChar = jisx201ToU(mySourceChar); |
2197 } | 2215 } |
2198 break; | 2216 break; |
2199 case HWKANA_7BIT: | 2217 case HWKANA_7BIT: |
2200 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { | 2218 if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { |
2201 /* 7-bit halfwidth Katakana */ | 2219 /* 7-bit halfwidth Katakana */ |
2202 targetUniChar = mySourceChar + (HWKANA_START - 0x21); | 2220 targetUniChar = mySourceChar + (HWKANA_START - 0x21); |
2203 } | 2221 } |
(...skipping 19 matching lines...) Expand all Loading... |
2223 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; | 2241 trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21)
; |
2224 if (leadIsOk && trailIsOk) { | 2242 if (leadIsOk && trailIsOk) { |
2225 ++mySource; | 2243 ++mySource; |
2226 tmpSourceChar = (mySourceChar << 8) | trailByte; | 2244 tmpSourceChar = (mySourceChar << 8) | trailByte; |
2227 if(cs == JISX208) { | 2245 if(cs == JISX208) { |
2228 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); | 2246 _2022ToSJIS((uint8_t)mySourceChar, trailByte, te
mpBuf); |
2229 mySourceChar = tmpSourceChar; | 2247 mySourceChar = tmpSourceChar; |
2230 } else { | 2248 } else { |
2231 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ | 2249 /* Copy before we modify tmpSourceChar so toUnic
odeCallback() sees the correct bytes. */ |
2232 mySourceChar = tmpSourceChar; | 2250 mySourceChar = tmpSourceChar; |
| 2251 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
2233 if (cs == KSC5601) { | 2252 if (cs == KSC5601) { |
2234 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ | 2253 tmpSourceChar += 0x8080; /* = _2022ToGR94DB
CS(tmpSourceChar) */ |
2235 } | 2254 } |
| 2255 #endif |
2236 tempBuf[0] = (char)(tmpSourceChar >> 8); | 2256 tempBuf[0] = (char)(tmpSourceChar >> 8); |
2237 tempBuf[1] = (char)(tmpSourceChar); | 2257 tempBuf[1] = (char)(tmpSourceChar); |
2238 } | 2258 } |
2239 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); | 2259 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->
myConverterArray[cs], tempBuf, 2, FALSE); |
2240 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { | 2260 } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { |
2241 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ | 2261 /* report a pair of illegal bytes if the second byte
is not a DBCS starter */ |
2242 ++mySource; | 2262 ++mySource; |
2243 /* add another bit so that the code below writes 2 b
ytes in case of error */ | 2263 /* add another bit so that the code below writes 2 b
ytes in case of error */ |
2244 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; | 2264 mySourceChar = 0x10000 | (mySourceChar << 8) | trail
Byte; |
2245 } | 2265 } |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2287 *err =U_BUFFER_OVERFLOW_ERROR; | 2307 *err =U_BUFFER_OVERFLOW_ERROR; |
2288 break; | 2308 break; |
2289 } | 2309 } |
2290 } | 2310 } |
2291 endloop: | 2311 endloop: |
2292 args->target = myTarget; | 2312 args->target = myTarget; |
2293 args->source = mySource; | 2313 args->source = mySource; |
2294 } | 2314 } |
2295 | 2315 |
2296 | 2316 |
| 2317 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
2297 /*************************************************************** | 2318 /*************************************************************** |
2298 * Rules for ISO-2022-KR encoding | 2319 * Rules for ISO-2022-KR encoding |
2299 * i) The KSC5601 designator sequence should appear only once in a file, | 2320 * i) The KSC5601 designator sequence should appear only once in a file, |
2300 * at the begining of a line before any KSC5601 characters. This usually | 2321 * at the begining of a line before any KSC5601 characters. This usually |
2301 * means that it appears by itself on the first line of the file | 2322 * means that it appears by itself on the first line of the file |
2302 * ii) There are only 2 shifting sequences SO to shift into double byte mode | 2323 * ii) There are only 2 shifting sequences SO to shift into double byte mode |
2303 * and SI to shift into single byte mode | 2324 * and SI to shift into single byte mode |
2304 */ | 2325 */ |
2305 static void | 2326 static void |
2306 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ | 2327 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
args, UErrorCode* err){ |
(...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3430 } | 3451 } |
3431 else{ | 3452 else{ |
3432 *err =U_BUFFER_OVERFLOW_ERROR; | 3453 *err =U_BUFFER_OVERFLOW_ERROR; |
3433 break; | 3454 break; |
3434 } | 3455 } |
3435 } | 3456 } |
3436 endloop: | 3457 endloop: |
3437 args->target = myTarget; | 3458 args->target = myTarget; |
3438 args->source = mySource; | 3459 args->source = mySource; |
3439 } | 3460 } |
| 3461 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
3440 | 3462 |
3441 static void | 3463 static void |
3442 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { | 3464 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
ode *err) { |
3443 UConverter *cnv = args->converter; | 3465 UConverter *cnv = args->converter; |
3444 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; | 3466 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraI
nfo; |
3445 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; | 3467 ISO2022State *pFromU2022State=&myConverterData->fromU2022State; |
3446 char *p, *subchar; | 3468 char *p, *subchar; |
3447 char buffer[8]; | 3469 char buffer[8]; |
3448 int32_t length; | 3470 int32_t length; |
3449 | 3471 |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3631 #endif | 3653 #endif |
3632 | 3654 |
3633 cnvData = (UConverterDataISO2022*)cnv->extraInfo; | 3655 cnvData = (UConverterDataISO2022*)cnv->extraInfo; |
3634 | 3656 |
3635 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ | 3657 /* open a set and initialize it with code points that are algorithmically ro
und-tripped */ |
3636 switch(cnvData->locale[0]){ | 3658 switch(cnvData->locale[0]){ |
3637 case 'j': | 3659 case 'j': |
3638 /* include JIS X 0201 which is hardcoded */ | 3660 /* include JIS X 0201 which is hardcoded */ |
3639 sa->add(sa->set, 0xa5); | 3661 sa->add(sa->set, 0xa5); |
3640 sa->add(sa->set, 0x203e); | 3662 sa->add(sa->set, 0x203e); |
| 3663 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
3641 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | 3664 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
3642 /* include Latin-1 for some variants of JP */ | 3665 /* include Latin-1 for some variants of JP */ |
3643 sa->addRange(sa->set, 0, 0xff); | 3666 sa->addRange(sa->set, 0, 0xff); |
3644 } else { | 3667 } else { |
3645 /* include ASCII for JP */ | 3668 /* include ASCII for JP */ |
3646 sa->addRange(sa->set, 0, 0x7f); | 3669 sa->addRange(sa->set, 0, 0x7f); |
3647 } | 3670 } |
| 3671 #else |
| 3672 /* include ASCII for JP */ |
| 3673 sa->addRange(sa->set, 0, 0x7f); |
| 3674 #endif |
3648 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { | 3675 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { |
3649 /* | 3676 /* |
3650 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 | 3677 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 |
3651 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) | 3678 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) |
3652 * use half-width Katakana. | 3679 * use half-width Katakana. |
3653 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) | 3680 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) |
3654 * half-width Katakana via the ESC ( I sequence. | 3681 * half-width Katakana via the ESC ( I sequence. |
3655 * However, we only emit (fromUnicode) half-width Katakana according
to the | 3682 * However, we only emit (fromUnicode) half-width Katakana according
to the |
3656 * definition of each variant. | 3683 * definition of each variant. |
3657 * | 3684 * |
3658 * When including fallbacks, | 3685 * When including fallbacks, |
3659 * we need to include half-width Katakana Unicode code points for al
l JP variants because | 3686 * we need to include half-width Katakana Unicode code points for al
l JP variants because |
3660 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). | 3687 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). |
3661 */ | 3688 */ |
3662 /* include half-width Katakana for JP */ | 3689 /* include half-width Katakana for JP */ |
3663 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | 3690 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
3664 } | 3691 } |
3665 break; | 3692 break; |
| 3693 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
3666 case 'c': | 3694 case 'c': |
3667 case 'z': | 3695 case 'z': |
3668 /* include ASCII for CN */ | 3696 /* include ASCII for CN */ |
3669 sa->addRange(sa->set, 0, 0x7f); | 3697 sa->addRange(sa->set, 0, 0x7f); |
3670 break; | 3698 break; |
3671 case 'k': | 3699 case 'k': |
3672 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ | 3700 /* there is only one converter for KR, and it is not in the myConverterA
rray[] */ |
3673 cnvData->currentConverter->sharedData->impl->getUnicodeSet( | 3701 cnvData->currentConverter->sharedData->impl->getUnicodeSet( |
3674 cnvData->currentConverter, sa, which, pErrorCode); | 3702 cnvData->currentConverter, sa, which, pErrorCode); |
3675 /* the loop over myConverterArray[] will simply not find another convert
er */ | 3703 /* the loop over myConverterArray[] will simply not find another convert
er */ |
3676 break; | 3704 break; |
| 3705 #endif |
3677 default: | 3706 default: |
3678 break; | 3707 break; |
3679 } | 3708 } |
3680 | 3709 |
3681 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem
ent ucnv_getUnicodeSet() with reverse fallbacks. */ | 3710 #if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implem
ent ucnv_getUnicodeSet() with reverse fallbacks. */ |
3682 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 3711 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
3683 cnvData->version==0 && i==CNS_11643 | 3712 cnvData->version==0 && i==CNS_11643 |
3684 ) { | 3713 ) { |
3685 /* special handling for non-EXT ISO-2022-CN: add only code point
s for CNS planes 1 and 2 */ | 3714 /* special handling for non-EXT ISO-2022-CN: add only code point
s for CNS planes 1 and 2 */ |
3686 ucnv_MBCSGetUnicodeSetForBytes( | 3715 ucnv_MBCSGetUnicodeSetForBytes( |
3687 cnvData->myConverterArray[i], | 3716 cnvData->myConverterArray[i], |
3688 sa, UCNV_ROUNDTRIP_SET, | 3717 sa, UCNV_ROUNDTRIP_SET, |
3689 0, 0x81, 0x82, | 3718 0, 0x81, 0x82, |
3690 pErrorCode); | 3719 pErrorCode); |
3691 } | 3720 } |
3692 #endif | 3721 #endif |
3693 | 3722 |
3694 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { | 3723 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
3695 UConverterSetFilter filter; | 3724 UConverterSetFilter filter; |
3696 if(cnvData->myConverterArray[i]!=NULL) { | 3725 if(cnvData->myConverterArray[i]!=NULL) { |
3697 if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 3726 if(cnvData->locale[0]=='j' && i==JISX208) { |
3698 cnvData->version==0 && i==CNS_11643 | 3727 /* |
3699 ) { | 3728 * Only add code points that map to Shift-JIS codes |
| 3729 * corresponding to JIS X 0208. |
| 3730 */ |
| 3731 filter=UCNV_SET_FILTER_SJIS; |
| 3732 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 3733 } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
| 3734 cnvData->version==0 && i==CNS_11643) { |
3700 /* | 3735 /* |
3701 * Version-specific for CN: | 3736 * Version-specific for CN: |
3702 * CN version 0 does not map CNS planes 3..7 although | 3737 * CN version 0 does not map CNS planes 3..7 although |
3703 * they are all available in the CNS conversion table; | 3738 * they are all available in the CNS conversion table; |
3704 * CN version 1 (-EXT) does map them all. | 3739 * CN version 1 (-EXT) does map them all. |
3705 * The two versions create different Unicode sets. | 3740 * The two versions create different Unicode sets. |
3706 */ | 3741 */ |
3707 filter=UCNV_SET_FILTER_2022_CN; | 3742 filter=UCNV_SET_FILTER_2022_CN; |
3708 } else if(cnvData->locale[0]=='j' && i==JISX208) { | |
3709 /* | |
3710 * Only add code points that map to Shift-JIS codes | |
3711 * corresponding to JIS X 0208. | |
3712 */ | |
3713 filter=UCNV_SET_FILTER_SJIS; | |
3714 } else if(i==KSC5601) { | 3743 } else if(i==KSC5601) { |
3715 /* | 3744 /* |
3716 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on
multiple tables) | 3745 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on
multiple tables) |
3717 * are broader than GR94. | 3746 * are broader than GR94. |
3718 */ | 3747 */ |
3719 filter=UCNV_SET_FILTER_GR94DBCS; | 3748 filter=UCNV_SET_FILTER_GR94DBCS; |
| 3749 #endif |
3720 } else { | 3750 } else { |
3721 filter=UCNV_SET_FILTER_NONE; | 3751 filter=UCNV_SET_FILTER_NONE; |
3722 } | 3752 } |
3723 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); | 3753 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); |
3724 } | 3754 } |
3725 } | 3755 } |
3726 | 3756 |
3727 /* | 3757 /* |
3728 * ISO 2022 converters must not convert SO/SI/ESC despite what | 3758 * ISO 2022 converters must not convert SO/SI/ESC despite what |
3729 * sub-converters do by themselves. | 3759 * sub-converters do by themselves. |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3847 NULL, | 3877 NULL, |
3848 NULL, | 3878 NULL, |
3849 &_ISO2022JPStaticData, | 3879 &_ISO2022JPStaticData, |
3850 FALSE, | 3880 FALSE, |
3851 &_ISO2022JPImpl, | 3881 &_ISO2022JPImpl, |
3852 0, UCNV_MBCS_TABLE_INITIALIZER | 3882 0, UCNV_MBCS_TABLE_INITIALIZER |
3853 }; | 3883 }; |
3854 | 3884 |
3855 } // namespace | 3885 } // namespace |
3856 | 3886 |
| 3887 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
3857 /************* KR ***************/ | 3888 /************* KR ***************/ |
3858 static const UConverterImpl _ISO2022KRImpl={ | 3889 static const UConverterImpl _ISO2022KRImpl={ |
3859 UCNV_ISO_2022, | 3890 UCNV_ISO_2022, |
3860 | 3891 |
3861 NULL, | 3892 NULL, |
3862 NULL, | 3893 NULL, |
3863 | 3894 |
3864 _ISO2022Open, | 3895 _ISO2022Open, |
3865 _ISO2022Close, | 3896 _ISO2022Close, |
3866 _ISO2022Reset, | 3897 _ISO2022Reset, |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3963 ~((uint32_t) 0), | 3994 ~((uint32_t) 0), |
3964 NULL, | 3995 NULL, |
3965 NULL, | 3996 NULL, |
3966 &_ISO2022CNStaticData, | 3997 &_ISO2022CNStaticData, |
3967 FALSE, | 3998 FALSE, |
3968 &_ISO2022CNImpl, | 3999 &_ISO2022CNImpl, |
3969 0, UCNV_MBCS_TABLE_INITIALIZER | 4000 0, UCNV_MBCS_TABLE_INITIALIZER |
3970 }; | 4001 }; |
3971 | 4002 |
3972 } // namespace | 4003 } // namespace |
| 4004 #endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
3973 | 4005 |
3974 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 4006 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
OLD | NEW |