Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: source/common/ucnvmbcs.cpp

Issue 1141463003: Make GB18030/GBK compliant to HTML5 Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ****************************************************************************** 2 ******************************************************************************
3 * 3 *
4 * Copyright (C) 2000-2014, International Business Machines 4 * Copyright (C) 2000-2014, International Business Machines
5 * Corporation and others. All Rights Reserved. 5 * Corporation and others. All Rights Reserved.
6 * 6 *
7 ****************************************************************************** 7 ******************************************************************************
8 * file name: ucnvmbcs.cpp 8 * file name: ucnvmbcs.cpp
9 * encoding: US-ASCII 9 * encoding: US-ASCII
10 * tab size: 8 (not used) 10 * tab size: 8 (not used)
(...skipping 534 matching lines...) Expand 10 before | Expand all | Expand 10 after
545 {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)}, 545 {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)},
546 {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)}, 546 {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)},
547 {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)}, 547 {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)},
548 {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)}, 548 {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)},
549 {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)}, 549 {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)},
550 {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)}, 550 {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)},
551 {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)} 551 {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)}
552 }; 552 };
553 553
554 /* bit flag for UConverter.options indicating GB 18030 special handling */ 554 /* bit flag for UConverter.options indicating GB 18030 special handling */
555 #define _MBCS_OPTION_GB18030 0x8000 555 #define _MBCS_OPTION_GB18030_TOU 0x08000
556 #define _MBCS_OPTION_GB18030_FROMU 0x10000
556 557
557 /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */ 558 /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
558 #define _MBCS_OPTION_KEIS 0x01000 559 #define _MBCS_OPTION_KEIS 0x01000
559 #define _MBCS_OPTION_JEF 0x02000 560 #define _MBCS_OPTION_JEF 0x02000
560 #define _MBCS_OPTION_JIPS 0x04000 561 #define _MBCS_OPTION_JIPS 0x04000
561 562
562 #define KEIS_SO_CHAR_1 0x0A 563 #define KEIS_SO_CHAR_1 0x0A
563 #define KEIS_SO_CHAR_2 0x42 564 #define KEIS_SO_CHAR_2 0x42
564 #define KEIS_SI_CHAR_1 0x0A 565 #define KEIS_SI_CHAR_1 0x0A
565 #define KEIS_SI_CHAR_2 0x41 566 #define KEIS_SI_CHAR_2 0x41
(...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after
1062 UCNV_SET_FILTER_DBCS_ONLY : 1063 UCNV_SET_FILTER_DBCS_ONLY :
1063 UCNV_SET_FILTER_NONE, 1064 UCNV_SET_FILTER_NONE,
1064 pErrorCode); 1065 pErrorCode);
1065 } 1066 }
1066 1067
1067 static void 1068 static void
1068 ucnv_MBCSGetUnicodeSet(const UConverter *cnv, 1069 ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
1069 const USetAdder *sa, 1070 const USetAdder *sa,
1070 UConverterUnicodeSet which, 1071 UConverterUnicodeSet which,
1071 UErrorCode *pErrorCode) { 1072 UErrorCode *pErrorCode) {
1072 if(cnv->options&_MBCS_OPTION_GB18030) { 1073 if(cnv->options&_MBCS_OPTION_GB18030_FROMU) {
1073 sa->addRange(sa->set, 0, 0xd7ff); 1074 sa->addRange(sa->set, 0, 0xd7ff);
1074 sa->addRange(sa->set, 0xe000, 0x10ffff); 1075 sa->addRange(sa->set, 0xe000, 0x10ffff);
1075 } else { 1076 } else {
1076 ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode) ; 1077 ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode) ;
1077 } 1078 }
1078 } 1079 }
1079 1080
1080 /* conversion extensions for input not in the main table -------------------- */ 1081 /* conversion extensions for input not in the main table -------------------- */
1081 1082
1082 /* 1083 /*
(...skipping 29 matching lines...) Expand all
1112 cp, source, sourceLimit, 1113 cp, source, sourceLimit,
1113 (char **)target, (char *)targetLimit, 1114 (char **)target, (char *)targetLimit,
1114 offsets, sourceIndex, 1115 offsets, sourceIndex,
1115 flush, 1116 flush,
1116 pErrorCode) 1117 pErrorCode)
1117 ) { 1118 ) {
1118 return 0; /* an extension mapping handled the input */ 1119 return 0; /* an extension mapping handled the input */
1119 } 1120 }
1120 1121
1121 /* GB 18030 */ 1122 /* GB 18030 */
1122 if((cnv->options&_MBCS_OPTION_GB18030)!=0) { 1123 if((cnv->options&_MBCS_OPTION_GB18030_FROMU)!=0) {
1123 const uint32_t *range; 1124 const uint32_t *range;
1124 int32_t i; 1125 int32_t i;
1125 1126
1126 range=gb18030Ranges[0]; 1127 range=gb18030Ranges[0];
1127 for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) { 1128 for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
1128 if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) { 1129 if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
1129 /* found the Unicode code point, output the four-byte sequence f or it */ 1130 /* found the Unicode code point, output the four-byte sequence f or it */
1130 uint32_t linear; 1131 uint32_t linear;
1131 char bytes[4]; 1132 char bytes[4];
1132 1133
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
1177 length, (const char **)source, (const char *)sourceLimit, 1178 length, (const char **)source, (const char *)sourceLimit,
1178 target, targetLimit, 1179 target, targetLimit,
1179 offsets, sourceIndex, 1180 offsets, sourceIndex,
1180 flush, 1181 flush,
1181 pErrorCode) 1182 pErrorCode)
1182 ) { 1183 ) {
1183 return 0; /* an extension mapping handled the input */ 1184 return 0; /* an extension mapping handled the input */
1184 } 1185 }
1185 1186
1186 /* GB 18030 */ 1187 /* GB 18030 */
1187 if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) { 1188 if(length==4 && (cnv->options&_MBCS_OPTION_GB18030_TOU)!=0) {
1188 const uint32_t *range; 1189 const uint32_t *range;
1189 uint32_t linear; 1190 uint32_t linear;
1190 int32_t i; 1191 int32_t i;
1191 1192
1192 linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2] , cnv->toUBytes[3]); 1193 linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2] , cnv->toUBytes[3]);
1193 range=gb18030Ranges[0]; 1194 range=gb18030Ranges[0];
1194 for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) { 1195 for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
1195 if(range[2]<=linear && linear<=range[3]) { 1196 if(range[2]<=linear && linear<=range[3]) {
1196 /* found the sequence, output the Unicode code point for it */ 1197 /* found the sequence, output the Unicode code point for it */
1197 *pErrorCode=U_ZERO_ERROR; 1198 *pErrorCode=U_ZERO_ERROR;
(...skipping 726 matching lines...) Expand 10 before | Expand all | Expand 10 after
1924 1925
1925 /* the option does not apply, remove it */ 1926 /* the option does not apply, remove it */
1926 cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; 1927 cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
1927 } 1928 }
1928 } 1929 }
1929 } 1930 }
1930 1931
1931 if(uprv_strstr(pArgs->name, "18030")!=NULL) { 1932 if(uprv_strstr(pArgs->name, "18030")!=NULL) {
1932 if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) { 1933 if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
1933 /* set a flag for GB 18030 mode, which changes the callback behavior */ 1934 /* set a flag for GB 18030 mode, which changes the callback behavior */
1934 cnv->options|=_MBCS_OPTION_GB18030; 1935 #if !UCONFIG_NO_NON_HTML5_CONVERSION
1936 cnv->options|=_MBCS_OPTION_GB18030_FROMU;
1937 #endif
1938 cnv->options|=_MBCS_OPTION_GB18030_TOU;
1935 } 1939 }
1936 } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->na me, "keis")!=NULL)) { 1940 } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->na me, "keis")!=NULL)) {
1937 /* set a flag for KEIS converter, which changes the SI/SO character sequ ence */ 1941 /* set a flag for KEIS converter, which changes the SI/SO character sequ ence */
1938 cnv->options|=_MBCS_OPTION_KEIS; 1942 cnv->options|=_MBCS_OPTION_KEIS;
1939 } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->nam e, "jef")!=NULL)) { 1943 } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->nam e, "jef")!=NULL)) {
1940 /* set a flag for JEF converter, which changes the SI/SO character seque nce */ 1944 /* set a flag for JEF converter, which changes the SI/SO character seque nce */
1941 cnv->options|=_MBCS_OPTION_JEF; 1945 cnv->options|=_MBCS_OPTION_JEF;
1942 } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->na me, "jips")!=NULL)) { 1946 } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->na me, "jips")!=NULL)) {
1943 /* set a flag for JIPS converter, which changes the SI/SO character sequ ence */ 1947 /* set a flag for JIPS converter, which changes the SI/SO character sequ ence */
1944 cnv->options|=_MBCS_OPTION_JIPS; 1948 cnv->options|=_MBCS_OPTION_JIPS;
(...skipping 3758 matching lines...) Expand 10 before | Expand all | Expand 10 after
5703 return (UConverterType)UCNV_SBCS; 5707 return (UConverterType)UCNV_SBCS;
5704 } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) { 5708 } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
5705 return (UConverterType)UCNV_EBCDIC_STATEFUL; 5709 return (UConverterType)UCNV_EBCDIC_STATEFUL;
5706 } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter ->sharedData->staticData->maxBytesPerChar==2) { 5710 } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter ->sharedData->staticData->maxBytesPerChar==2) {
5707 return (UConverterType)UCNV_DBCS; 5711 return (UConverterType)UCNV_DBCS;
5708 } 5712 }
5709 return (UConverterType)UCNV_MBCS; 5713 return (UConverterType)UCNV_MBCS;
5710 } 5714 }
5711 5715
5712 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 5716 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698