| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2000-2012, International Business Machines | 3 * Copyright (C) 2000-2012, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 * file name: ucnv2022.cpp | 6 * file name: ucnv2022.cpp |
| 7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
| 9 * indentation:4 | 9 * indentation:4 |
| 10 * | 10 * |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 160 | 160 |
| 161 /* | 161 /* |
| 162 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence | 162 * Each of these charset masks (with index x) contains a bit for a charset in ex
act correspondence |
| 163 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x | 163 * to whether that charset is used in the corresponding version x of ISO_2022,lo
cale=ja,version=x |
| 164 * | 164 * |
| 165 * Note: The converter uses some leniency: | 165 * Note: The converter uses some leniency: |
| 166 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in | 166 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in |
| 167 * all versions, not just JIS7 and JIS8. | 167 * all versions, not just JIS7 and JIS8. |
| 168 * - ICU does not distinguish between different versions of JIS X 0208. | 168 * - ICU does not distinguish between different versions of JIS X 0208. |
| 169 */ | 169 */ |
| 170 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 171 enum { MAX_JA_VERSION=0 }; |
| 172 #else |
| 170 enum { MAX_JA_VERSION=4 }; | 173 enum { MAX_JA_VERSION=4 }; |
| 174 #endif |
| 171 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 175 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
| 172 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 176 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
| 177 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 173 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 178 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
| 174 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 179 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
| 175 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 180 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
| 176 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 181 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB231
2)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
| 182 #endif |
| 177 }; | 183 }; |
| 178 | 184 |
| 179 typedef enum { | 185 typedef enum { |
| 180 ASCII1=0, | 186 ASCII1=0, |
| 181 LATIN1, | 187 LATIN1, |
| 182 SBCS, | 188 SBCS, |
| 183 DBCS, | 189 DBCS, |
| 184 MBCS, | 190 MBCS, |
| 185 HWKANA | 191 HWKANA |
| 186 }Cnv2022Type; | 192 }Cnv2022Type; |
| (...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 354 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 360 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
| 355 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 | 361 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMI
NAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_T
ERMINAL_2022 ,VALID_TERMINAL_2022 |
| 356 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 362 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
| 357 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 363 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
| 358 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 364 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
| 359 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 | 365 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_
2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMI
NAL_2022 ,VALID_TERMINAL_2022 |
| 360 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 | 366 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 |
| 361 }; | 367 }; |
| 362 | 368 |
| 363 | 369 |
| 370 /* Enable ISO-2022-{KR,CN,CN-Ext} for now. |
| 371 * TODO(jshin): Disable it when we know what to do about 'replacement' |
| 372 * encodings. See http://crbug.com/277037 and |
| 373 * https://codereview.chromium.org/145973021/ |
| 374 */ |
| 375 #ifndef U_ENABLE_ISO_2022_KR_CN |
| 376 #define U_ENABLE_ISO_2022_KR_CN 1 |
| 377 #endif |
| 378 |
| 364 /* Type def for refactoring changeState_2022 code*/ | 379 /* Type def for refactoring changeState_2022 code*/ |
| 365 typedef enum{ | 380 typedef enum{ |
| 366 #ifdef U_ENABLE_GENERIC_ISO_2022 | 381 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 367 ISO_2022=0, | 382 ISO_2022=0, |
| 368 #endif | 383 #endif |
| 369 ISO_2022_JP=1, | 384 ISO_2022_JP=1, |
| 385 #ifdef U_ENABLE_ISO_2022_KR_CN |
| 370 ISO_2022_KR=2, | 386 ISO_2022_KR=2, |
| 371 ISO_2022_CN=3 | 387 ISO_2022_CN=3 |
| 388 #endif |
| 372 } Variant2022; | 389 } Variant2022; |
| 373 | 390 |
| 374 /*********** ISO 2022 Converter Protos ***********/ | 391 /*********** ISO 2022 Converter Protos ***********/ |
| 375 static void | 392 static void |
| 376 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); | 393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); |
| 377 | 394 |
| 378 static void | 395 static void |
| 379 _ISO2022Close(UConverter *converter); | 396 _ISO2022Close(UConverter *converter); |
| 380 | 397 |
| 381 static void | 398 static void |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 478 myConverterData->version = version; | 495 myConverterData->version = version; |
| 479 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && | 496 if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && |
| 480 (myLocale[2]=='_' || myLocale[2]=='\0')) | 497 (myLocale[2]=='_' || myLocale[2]=='\0')) |
| 481 { | 498 { |
| 482 size_t len=0; | 499 size_t len=0; |
| 483 /* open the required converters and cache them */ | 500 /* open the required converters and cache them */ |
| 484 if(version>MAX_JA_VERSION) { | 501 if(version>MAX_JA_VERSION) { |
| 485 /* prevent indexing beyond jpCharsetMasks[] */ | 502 /* prevent indexing beyond jpCharsetMasks[] */ |
| 486 myConverterData->version = version = 0; | 503 myConverterData->version = version = 0; |
| 487 } | 504 } |
| 505 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 488 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | 506 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
| 489 myConverterData->myConverterArray[ISO8859_7] = | 507 myConverterData->myConverterArray[ISO8859_7] = |
| 490 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); | 508 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, e
rrorCode); |
| 491 } | 509 } |
| 510 #endif |
| 492 myConverterData->myConverterArray[JISX208] = | 511 myConverterData->myConverterArray[JISX208] = |
| 493 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); | 512 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, error
Code); |
| 513 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 494 if(jpCharsetMasks[version]&CSM(JISX212)) { | 514 if(jpCharsetMasks[version]&CSM(JISX212)) { |
| 495 myConverterData->myConverterArray[JISX212] = | 515 myConverterData->myConverterArray[JISX212] = |
| 496 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); | 516 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, er
rorCode); |
| 497 } | 517 } |
| 498 if(jpCharsetMasks[version]&CSM(GB2312)) { | 518 if(jpCharsetMasks[version]&CSM(GB2312)) { |
| 499 myConverterData->myConverterArray[GB2312] = | 519 myConverterData->myConverterArray[GB2312] = |
| 500 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, er
rorCode); /* gb_2312_80-1 */ | 520 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackAr
gs, errorCode); /* gb_2312_80-1 */ |
| 501 } | 521 } |
| 502 if(jpCharsetMasks[version]&CSM(KSC5601)) { | 522 if(jpCharsetMasks[version]&CSM(KSC5601)) { |
| 503 myConverterData->myConverterArray[KSC5601] = | 523 myConverterData->myConverterArray[KSC5601] = |
| 504 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); | 524 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, er
rorCode); |
| 505 } | 525 } |
| 526 #endif |
| 506 | 527 |
| 507 /* set the function pointers to appropriate funtions */ | 528 /* set the function pointers to appropriate funtions */ |
| 508 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 529 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
| 509 uprv_strcpy(myConverterData->locale,"ja"); | 530 uprv_strcpy(myConverterData->locale,"ja"); |
| 510 | 531 |
| 511 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); | 532 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=
"); |
| 512 len = uprv_strlen(myConverterData->name); | 533 len = uprv_strlen(myConverterData->name); |
| 513 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; | 534 myConverterData->name[len]=(char)(myConverterData->version+(int)'0')
; |
| 514 myConverterData->name[len+1]='\0'; | 535 myConverterData->name[len+1]='\0'; |
| 515 } | 536 } |
| 537 #ifdef U_ENABLE_ISO_2022_KR_CN |
| 516 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 538 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
| 517 (myLocale[2]=='_' || myLocale[2]=='\0')) | 539 (myLocale[2]=='_' || myLocale[2]=='\0')) |
| 518 { | 540 { |
| 519 const char *cnvName; | 541 const char *cnvName; |
| 520 if(version==1) { | 542 if(version==1) { |
| 521 cnvName="icu-internal-25546"; | 543 cnvName="icu-internal-25546"; |
| 522 } else { | 544 } else { |
| 523 cnvName="ibm-949"; | 545 cnvName="ibm-949"; |
| 524 myConverterData->version=version=0; | 546 myConverterData->version=version=0; |
| 525 } | 547 } |
| (...skipping 25 matching lines...) Expand all Loading... |
| 551 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; | 573 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; |
| 552 uprv_strcpy(myConverterData->locale,"ko"); | 574 uprv_strcpy(myConverterData->locale,"ko"); |
| 553 } | 575 } |
| 554 } | 576 } |
| 555 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m
yLocale[1]=='n'))&& | 577 else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& m
yLocale[1]=='n'))&& |
| 556 (myLocale[2]=='_' || myLocale[2]=='\0')) | 578 (myLocale[2]=='_' || myLocale[2]=='\0')) |
| 557 { | 579 { |
| 558 | 580 |
| 559 /* open the required converters and cache them */ | 581 /* open the required converters and cache them */ |
| 560 myConverterData->myConverterArray[GB2312_1] = | 582 myConverterData->myConverterArray[GB2312_1] = |
| 561 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorC
ode); | 583 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs,
errorCode); |
| 562 if(version==1) { | 584 if(version==1) { |
| 563 myConverterData->myConverterArray[ISO_IR_165] = | 585 myConverterData->myConverterArray[ISO_IR_165] = |
| 564 ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs,
errorCode); | 586 ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackA
rgs, errorCode); |
| 565 } | 587 } |
| 566 myConverterData->myConverterArray[CNS_11643] = | 588 myConverterData->myConverterArray[CNS_11643] = |
| 567 ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs,
errorCode); | 589 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs,
errorCode); |
| 568 | 590 |
| 569 | 591 |
| 570 /* set the function pointers to appropriate funtions */ | 592 /* set the function pointers to appropriate funtions */ |
| 571 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; | 593 cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; |
| 572 uprv_strcpy(myConverterData->locale,"cn"); | 594 uprv_strcpy(myConverterData->locale,"cn"); |
| 573 | 595 |
| 574 if (version==0){ | 596 if (version==0){ |
| 575 myConverterData->version = 0; | 597 myConverterData->version = 0; |
| 576 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); | 598 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=0"); |
| 577 }else if (version==1){ | 599 }else if (version==1){ |
| 578 myConverterData->version = 1; | 600 myConverterData->version = 1; |
| 579 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); | 601 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=1"); |
| 580 }else { | 602 }else { |
| 581 myConverterData->version = 2; | 603 myConverterData->version = 2; |
| 582 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); | 604 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,vers
ion=2"); |
| 583 } | 605 } |
| 584 } | 606 } |
| 607 #endif // U_ENABLE_ISO_2022_KR_CN |
| 585 else{ | 608 else{ |
| 586 #ifdef U_ENABLE_GENERIC_ISO_2022 | 609 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 587 myConverterData->isFirstBuffer = TRUE; | 610 myConverterData->isFirstBuffer = TRUE; |
| 588 | 611 |
| 589 /* append the UTF-8 escape sequence */ | 612 /* append the UTF-8 escape sequence */ |
| 590 cnv->charErrorBufferLength = 3; | 613 cnv->charErrorBufferLength = 3; |
| 591 cnv->charErrorBuffer[0] = 0x1b; | 614 cnv->charErrorBuffer[0] = 0x1b; |
| 592 cnv->charErrorBuffer[1] = 0x25; | 615 cnv->charErrorBuffer[1] = 0x25; |
| 593 cnv->charErrorBuffer[2] = 0x42; | 616 cnv->charErrorBuffer[2] = 0x42; |
| 594 | 617 |
| (...skipping 3347 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3942 NULL, | 3965 NULL, |
| 3943 &_ISO2022CNStaticData, | 3966 &_ISO2022CNStaticData, |
| 3944 FALSE, | 3967 FALSE, |
| 3945 &_ISO2022CNImpl, | 3968 &_ISO2022CNImpl, |
| 3946 0, UCNV_MBCS_TABLE_INITIALIZER | 3969 0, UCNV_MBCS_TABLE_INITIALIZER |
| 3947 }; | 3970 }; |
| 3948 | 3971 |
| 3949 } // namespace | 3972 } // namespace |
| 3950 | 3973 |
| 3951 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 3974 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| OLD | NEW |