patches/uconv.patch - Issue 1637683002: ICU 56 step 5: Apply the remaining local patches

Side by Side Diff: patches/uconv.patch

Issue 1637683002: ICU 56 step 5: Apply the remaining local patches (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@56post56

Patch Set: drop uconv.patch Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp

2 index 9556dd2..7b8df9b 100644

3 --- a/source/common/ucnv2022.cpp

4 +++ b/source/common/ucnv2022.cpp

5 @@ -152,7 +152,11 @@ typedef enum {

6 } StateEnum;

7

8 /* is the StateEnum charset value for a DBCS charset? */

9 +#if UCONFIG_NO_NON_HTML5_CONVERSION

10 +#define IS_JP_DBCS(cs) (JISX208==(cs))

11 +#else

12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)

13 +#endif

14

15 #define CSM(cs) ((uint16_t)1<<(cs))

16

17 @@ -165,13 +169,23 @@ typedef enum {

18 * all versions, not just JIS7 and JIS8.

19 * - ICU does not distinguish between different versions of JIS X 0208.

20 */

21 +#if UCONFIG_NO_NON_HTML5_CONVERSION

22 +enum { MAX_JA_VERSION=0 };

23 +#else

24 enum { MAX_JA_VERSION=4 };

25 +#endif

26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={

27 +/*

28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it.

29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885

30 + */

31 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT),

32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

33 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212),

34 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB23 12)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),

35 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB23 12)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),

36 CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB23 12)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7)

37 +#endif

38 };

39

40 typedef enum {

41 @@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES _2022] = {

42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202 2 ,VALID_TERMINAL_2022

43 };

44

45 -

46 /* Type def for refactoring changeState_2022 code*/

47 typedef enum{

48 #ifdef U_ENABLE_GENERIC_ISO_2022

49 ISO_2022=0,

50 #endif

51 +#if UCONFIG_NO_NON_HTML5_CONVERSION

52 + ISO_2022_JP=1

53 +#else

54 ISO_2022_JP=1,

55 ISO_2022_KR=2,

56 ISO_2022_CN=3

57 +#endif

58 } Variant2022;

59

60 /********* ISO 2022 Converter Protos *********/

61 @@ -483,12 +500,15 @@ _ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, U ErrorCode *errorCode){

62 /* prevent indexing beyond jpCharsetMasks[] */

63 myConverterData->version = version = 0;

64 }

65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {

67 myConverterData->myConverterArray[ISO8859_7] =

68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);

69 }

70 +#endif

71 myConverterData->myConverterArray[JISX208] =

72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro rCode);

73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

74 if(jpCharsetMasks[version]&CSM(JISX212)) {

75 myConverterData->myConverterArray[JISX212] =

76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e rrorCode);

77 @@ -501,6 +521,7 @@ _ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UEr rorCode *errorCode){

78 myConverterData->myConverterArray[KSC5601] =

79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e rrorCode);

80 }

81 +#endif

82

83 /* set the function pointers to appropriate funtions */

84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);

85 @@ -511,6 +532,7 @@ _ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UEr rorCode *errorCode){

86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0' );

87 myConverterData->name[len+1]='\0';

88 }

89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

90 else if(myLocale[0]=='k' && (myLocale[1]=='o'\|\| myLocale[1]=='r') &&

91 (myLocale[2]=='_' \|\| myLocale[2]=='\0'))

92 {

93 @@ -580,6 +602,7 @@ _ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UEr rorCode *errorCode){

94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver sion=2");

95 }

96 }

97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION

98 else{

99 #ifdef U_ENABLE_GENERIC_ISO_2022

100 myConverterData->isFirstBuffer = TRUE;

101 @@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {

102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

103 };

104

105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

106 /************* to unicode *****************/

107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {

108 /* 0 1 2 3 4 5 6 7 8 9 * /

109 @@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {

110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE

111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

112 };

113 +#endif

114

115

116 static UCNV_TableStates_2022

117 @@ -878,6 +903,7 @@ DONE:

118 }

119 break;

120 /* case SS3_STATE: not used in ISO-2022-JP-x */

121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

122 case ISO8859_1:

123 case ISO8859_7:

124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {

125 @@ -887,6 +913,7 @@ DONE:

126 myData2022->toU2022State.cs[2]=(int8_t)tempState;

127 }

128 break;

129 +#endif

130 default:

131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {

132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

133 @@ -898,6 +925,7 @@ DONE:

134 }

135 }

136 break;

137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

138 case ISO_2022_CN:

139 {

140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];

141 @@ -959,6 +987,7 @@ DONE:

142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

143 }

144 break;

145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */

146

147 default:

148 *err = U_ILLEGAL_ESCAPE_SEQUENCE;

149 @@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv,

150 static const StateEnum jpCharsetPref[]={

151 ASCII,

152 JISX201,

153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

154 ISO8859_1,

155 ISO8859_7,

156 +#endif

157 JISX208,

158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

159 JISX212,

160 GB2312,

161 KSC5601,

162 +#endif

163 HWKANA_7BIT

164 };

165

166 @@ -1754,6 +1787,7 @@ getTrail:

167 g = 0;

168 }

169 break;

170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

171 case ISO8859_1:

172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {

173 targetValue = (uint32_t)sourceChar - 0x80;

174 @@ -1762,6 +1796,7 @@ getTrail:

175 g = 2;

176 }

177 break;

178 +#endif

179 case HWKANA_7BIT:

180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H WKANA_START)) {

181 if(converterData->version==3) {

182 @@ -1823,6 +1858,7 @@ getTrail:

183 useFallback = FALSE;

184 }

185 break;

186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

187 case ISO8859_7:

188 /* G0 SBCS forced to 7-bit output */

189 len2 = MBCS_SINGLE_FROM_UCHAR32(

190 @@ -1837,6 +1873,7 @@ getTrail:

191 useFallback = FALSE;

192 }

193 break;

194 +#endif

195 default:

196 /* G0 DBCS */

197 len2 = MBCS_FROM_UCHAR32_ISO2022(

198 @@ -1844,6 +1881,7 @@ getTrail:

199 sourceChar, &value,

200 useFallback, MBCS_OUTPUT_2);

201 if(len2 == 2 \|\| (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

203 if(cs0 == KSC5601) {

204 /*

205 * Check for valid bytes for the encoding scheme.

206 @@ -1855,6 +1893,7 @@ getTrail:

207 break;

208 }

209 }

210 +#endif

211 targetValue = value;

212 len = len2;

213 cs = cs0;

214 @@ -2148,6 +2187,7 @@ escape:

215 targetUniChar = mySourceChar;

216 }

217 break;

218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

219 case ISO8859_1:

220 if(mySourceChar <= 0x7f) {

221 targetUniChar = mySourceChar + 0x80;

222 @@ -2166,6 +2206,7 @@ escape:

223 /* return from a single-shift state to the previous one */

224 pToU2022State->g=pToU2022State->prevG;

225 break;

226 +#endif

227 case JISX201:

228 if(mySourceChar <= 0x7f) {

229 targetUniChar = jisx201ToU(mySourceChar);

230 @@ -2205,9 +2246,11 @@ getTrailByte:

231 } else {

232 /* Copy before we modify tmpSourceChar so toUni codeCallback() sees the correct bytes. */

233 mySourceChar = tmpSourceChar;

234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

235 if (cs == KSC5601) {

236 tmpSourceChar += 0x8080; /* = _2022ToGR94D BCS(tmpSourceChar) */

237 }

238 +#endif

239 tempBuf[0] = (char)(tmpSourceChar >> 8);

240 tempBuf[1] = (char)(tmpSourceChar);

241 }

242 @@ -2269,6 +2312,7 @@ endloop:

243 }

244

245

246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

247 /***************************************************************

248 * Rules for ISO-2022-KR encoding

249 * i) The KSC5601 designator sequence should appear only once in a file,

250 @@ -3412,6 +3456,7 @@ endloop:

251 args->target = myTarget;

252 args->source = mySource;

253 }

254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */

255

256 static void

257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UError Code err) {

258 @@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

259 /* include JIS X 0201 which is hardcoded */

260 sa->add(sa->set, 0xa5);

261 sa->add(sa->set, 0x203e);

262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {

264 /* include Latin-1 for some variants of JP */

265 sa->addRange(sa->set, 0, 0xff);

266 @@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

267 /* include ASCII for JP */

268 sa->addRange(sa->set, 0, 0x7f);

269 }

270 +#else

271 + /* include ASCII for JP */

272 + sa->addRange(sa->set, 0, 0x7f);

273 +#endif

274 if(cnvData->version==3 \|\| cnvData->version==4 \|\| which==UCNV_ROUNDTRIP_ AND_FALLBACK_SET) {

275 /*

276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))! =0

277 @@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

278 sa->addRange(sa->set, HWKANA_START, HWKANA_END);

279 }

280 break;

281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

282 case 'c':

283 case 'z':

284 /* include ASCII for CN */

285 @@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

286 cnvData->currentConverter, sa, which, pErrorCode);

287 /* the loop over myConverterArray[] will simply not find another conver ter */

288 break;

289 +#endif

290 default:

291 break;

292 }

293 @@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

295 UConverterSetFilter filter;

296 if(cnvData->myConverterArray[i]!=NULL) {

297 - if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&

298 - cnvData->version==0 && i==CNS_11643

299 - ) {

300 + if(cnvData->locale[0]=='j' && i==JISX208) {

301 + /*

302 + * Only add code points that map to Shift-JIS codes

303 + * corresponding to JIS X 0208.

304 + */

305 + filter=UCNV_SET_FILTER_SJIS;

306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

307 + } else if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&

308 + cnvData->version==0 && i==CNS_11643) {

309 /*

310 * Version-specific for CN:

311 * CN version 0 does not map CNS planes 3..7 although

312 @@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

313 * The two versions create different Unicode sets.

314 */

315 filter=UCNV_SET_FILTER_2022_CN;

316 - } else if(cnvData->locale[0]=='j' && i==JISX208) {

317 - /*

318 - * Only add code points that map to Shift-JIS codes

319 - * corresponding to JIS X 0208.

320 - */

321 - filter=UCNV_SET_FILTER_SJIS;

322 } else if(i==KSC5601) {

323 /*

324 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o n multiple tables)

325 * are broader than GR94.

326 */

327 filter=UCNV_SET_FILTER_GR94DBCS;

328 +#endif

329 } else {

330 filter=UCNV_SET_FILTER_NONE;

331 }

332 @@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={

333

334 } // namespace

335

336 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

337 /*********** KR *************/

338 static const UConverterImpl _ISO2022KRImpl={

339 UCNV_ISO_2022,

340 @@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={

341 };

342

343 } // namespace

344 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */

345

346 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

347 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp

348 index 4940310..047f18a 100644

349 --- a/source/common/ucnv_bld.cpp

350 +++ b/source/common/ucnv_bld.cpp

351 @@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={

352

353 #if UCONFIG_NO_LEGACY_CONVERSION

354 NULL,

355 +#else

356 + &_ISO2022Data,

357 +#endif

358 +

359 +#if UCONFIG_NO_LEGACY_CONVERSION \|\| UCONFIG_NO_NON_HTML5_CONVERSION

360 NULL, NULL, NULL, NULL, NULL, NULL,

361 NULL, NULL, NULL, NULL, NULL, NULL,

362 NULL,

363 #else

364 - &_ISO2022Data,

365 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC SData6,

366 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC SData19,

367 &_HZData,

368 #endif

369

370 +#if UCONFIG_NO_NON_HTML5_CONVERSION

371 + NULL,

372 +#else

373 &_SCSUData,

374 +#endif

375

376 -#if UCONFIG_NO_LEGACY_CONVERSION

377 +

378 +#if UCONFIG_NO_LEGACY_CONVERSION \|\| UCONFIG_NO_NON_HTML5_CONVERSION

379 NULL,

380 #else

381 &_ISCIIData,

382 #endif

383

384 &_ASCIIData,

385 +#if UCONFIG_NO_NON_HTML5_CONVERSION

386 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL,

387 +#else

388 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,

389 +#endif

390

391 -#if UCONFIG_NO_LEGACY_CONVERSION

392 +#if UCONFIG_NO_LEGACY_CONVERSION \|\| UCONFIG_NO_NON_HTML5_CONVERSION

393 NULL,

394 #else

395 &_CompoundTextData

396 @@ -105,18 +118,24 @@ static struct {

397 const char *name;

398 const UConverterType type;

399 } const cnvNameType[] = {

400 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

401 { "bocu1", UCNV_BOCU1 },

402 { "cesu8", UCNV_CESU8 },

403 -#if !UCONFIG_NO_LEGACY_CONVERSION

404 +#endif

405 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

406 { "hz",UCNV_HZ },

407 #endif

408 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

409 { "imapmailboxname", UCNV_IMAP_MAILBOX },

410 -#if !UCONFIG_NO_LEGACY_CONVERSION

411 +#endif

412 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

413 { "iscii", UCNV_ISCII },

414 +#endif

415 +#if !UCONFIG_NO_LEGACY_CONVERSION

416 { "iso2022", UCNV_ISO_2022 },

417 #endif

418 { "iso88591", UCNV_LATIN_1 },

419 -#if !UCONFIG_NO_LEGACY_CONVERSION

420 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

421 { "lmbcs1", UCNV_LMBCS_1 },

422 { "lmbcs11",UCNV_LMBCS_11 },

423 { "lmbcs16",UCNV_LMBCS_16 },

424 @@ -130,7 +149,9 @@ static struct {

425 { "lmbcs6", UCNV_LMBCS_6 },

426 { "lmbcs8", UCNV_LMBCS_8 },

427 #endif

428 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

429 { "scsu", UCNV_SCSU },

430 +#endif

431 { "usascii", UCNV_US_ASCII },

432 { "utf16", UCNV_UTF16 },

433 { "utf16be", UCNV_UTF16_BigEndian },

434 @@ -152,9 +173,13 @@ static struct {

435 { "utf32oppositeendian", UCNV_UTF32_BigEndian },

436 { "utf32platformendian", UCNV_UTF32_LittleEndian },

437 #endif

438 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

439 { "utf7", UCNV_UTF7 },

440 +#endif

441 { "utf8", UCNV_UTF8 },

442 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

443 { "x11compoundtext", UCNV_COMPOUND_TEXT}

444 +#endif

445 };

446

447

448 diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h

449 index 402e2c9..5fad446 100644

450 --- a/source/common/ucnv_cnv.h

451 +++ b/source/common/ucnv_cnv.h

452 @@ -256,11 +256,15 @@ struct UConverterImpl {

453 extern const UConverterSharedData

454 _MBCSData, _Latin1Data,

455 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,

456 - _ISO2022Data,

457 + _ISO2022Data,

458 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

459 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6 ,

460 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1 9,

461 _HZData,_ISCIIData, _SCSUData, _ASCIIData,

462 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp oundTextData;

463 +#else

464 + _ASCIIData, _UTF16Data, _UTF32Data;

465 +#endif

466

467 U_CDECL_END

468

469 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c

470 index ec0e9c2..e723fa6 100644

471 --- a/source/common/ucnv_ct.c

472 +++ b/source/common/ucnv_ct.c

473 @@ -14,7 +14,7 @@

474

475 #include "unicode/utypes.h"

476

477 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

478 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION

479

480 #include "unicode/ucnv.h"

481 #include "unicode/uset.h"

482 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c

483 index 1d921dd..a4fccee 100644

484 --- a/source/common/ucnv_lmb.c

485 +++ b/source/common/ucnv_lmb.c

486 @@ -25,7 +25,7 @@

487

488 #include "unicode/utypes.h"

489

490 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

491 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION

492

493 #include "unicode/ucnv_err.h"

494 #include "unicode/ucnv.h"

495 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c

496 index 42943f4..6466b87 100644

497 --- a/source/common/ucnv_u7.c

498 +++ b/source/common/ucnv_u7.c

499 @@ -16,7 +16,7 @@

500

501 #include "unicode/utypes.h"

502

503 -#if !UCONFIG_NO_CONVERSION

504 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

505

506 #include "unicode/ucnv.h"

507 #include "ucnv_bld.h"

508 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c

509 index 8ee9fe5..24205f5 100644

510 --- a/source/common/ucnv_u8.c

511 +++ b/source/common/ucnv_u8.c

512 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {

513 static const uint32_t

514 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };

515

516 +static UBool hasCESU8Data(const UConverter *cnv)

517 +{

518 +#if UCONFIG_NO_NON_HTML5_CONVERSION

519 + return FALSE;

520 +#else

521 + return (UBool)(cnv->sharedData == &_CESU8Data);

522 +#endif

523 +}

524 +

525 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,

526 UErrorCode * err)

527 {

528 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,

529 const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;

530 const UChar *targetLimit = args->targetLimit;

531 unsigned char *toUBytes = cnv->toUBytes;

532 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);

533 + UBool isCESU8 = hasCESU8Data(cnv);

534 uint32_t ch, ch2 = 0;

535 int32_t i, inBytes;

536 -

537 +

538 /* Restore size of current sequence */

539 if (cnv->toUnicodeStatus && myTarget < targetLimit)

540 {

541 @@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToU nicodeArgs * args,

542 const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;

543 const UChar *targetLimit = args->targetLimit;

544 unsigned char *toUBytes = cnv->toUBytes;

545 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);

546 + UBool isCESU8 = hasCESU8Data(cnv);

547 uint32_t ch, ch2 = 0;

548 int32_t i, inBytes;

549

550 @@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArg s * args,

551 UChar32 ch;

552 uint8_t tempBuf[4];

553 int32_t indexToWrite;

554 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);

555 + UBool isNotCESU8 = !hasCESU8Data(cnv);

556

557 if (cnv->fromUChar32 && myTarget < targetLimit)

558 {

559 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter FromUnicodeArgs * ar

560 int32_t offsetNum, nextSourceIndex;

561 int32_t indexToWrite;

562 uint8_t tempBuf[4];

563 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);

564 + UBool isNotCESU8 = !hasCESU8Data(cnv);

565

566 if (cnv->fromUChar32 && myTarget < targetLimit)

567 {

568 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp

569 index b97d666..281d6d9 100644

570 --- a/source/common/ucnvbocu.cpp

571 +++ b/source/common/ucnvbocu.cpp

572 @@ -19,7 +19,7 @@

573

574 #include "unicode/utypes.h"

575

576 -#if !UCONFIG_NO_CONVERSION

577 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

578

579 #include "unicode/ucnv.h"

580 #include "unicode/ucnv_cb.h"

581 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c

582 index 3760c39..51825e2 100644

583 --- a/source/common/ucnvhz.c

584 +++ b/source/common/ucnvhz.c

585 @@ -16,7 +16,7 @@

586

587 #include "unicode/utypes.h"

588

589 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

590 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION

591

592 #include "cmemory.h"

593 #include "unicode/ucnv.h"

594 @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={

595 0

596 };

597

598 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

599 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF IG_NO_NON_HTML5_CONVERSION */

600 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c

601 index fe61d40..16fd0a3 100644

602 --- a/source/common/ucnvisci.c

603 +++ b/source/common/ucnvisci.c

604 @@ -17,7 +17,7 @@

605

606 #include "unicode/utypes.h"

607

608 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

609 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION

610

611 #include "unicode/ucnv.h"

612 #include "unicode/ucnv_cb.h"

613 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c

614 index c6e96e1..a6f8c9e 100644

615 --- a/source/common/ucnvscsu.c

616 +++ b/source/common/ucnvscsu.c

617 @@ -21,7 +21,7 @@

618

619 #include "unicode/utypes.h"

620

621 -#if !UCONFIG_NO_CONVERSION

622 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

623

624 #include "unicode/ucnv.h"

625 #include "unicode/ucnv_cb.h"

626 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h

627 index ed073b6..8df56e6 100644

628 --- a/source/common/unicode/uconfig.h

629 +++ b/source/common/unicode/uconfig.h

630 @@ -270,6 +270,14 @@

631 #endif

632

633 /**

634 + * This switch turns off all the converters NOT listed in

635 + * the encoding standard : http://encoding.spec.whatwg.org

636 + */

637 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION

638 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0

639 +#endif

640 +

641 +/**

642 * \def UCONFIG_NO_LEGACY_CONVERSION

643 * This switch turns off all converters except for

644 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)

645 diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h

646 index a817262..89becca 100644

647 --- a/source/common/unicode/urename.h

648 +++ b/source/common/unicode/urename.h

649 @@ -73,12 +73,14 @@

650 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)

651 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)

652 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)

653 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

654 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)

655 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)

656 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)

657 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)

658 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)

659 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)

660 +#endif

661 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)

662 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)

663 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)

664 @@ -94,14 +96,18 @@

665 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)

666 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)

667 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)

668 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

669 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)

670 +#endif

671 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)

672 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)

673 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)

674 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)

675 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)

676 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)

677 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

678 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)

679 +#endif

680 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)

681 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)

682 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)

683 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp

684 index 3efbd49..ba5b18c 100644

685 --- a/source/i18n/csdetect.cpp

686 +++ b/source/i18n/csdetect.cpp

687 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {

688 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),

689

690 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),

691 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

692 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),

693 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),

694

695 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {

696 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),

697 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),

698 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)

699 +#endif

700 };

701 int32_t rCount = ARRAY_SIZE(tempArray);

702

703 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp

704 index 3db0bc9..be3eafa 100644

705 --- a/source/i18n/csr2022.cpp

706 +++ b/source/i18n/csr2022.cpp

707 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {

708 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7

709 };

710

711 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

712 static const uint8_t escapeSequences_2022KR[][5] = {

713 {0x1b, 0x24, 0x29, 0x43, 0x00}

714 };

715 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {

716 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2

717 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3

718 };

719 +#endif

720

721 CharsetRecog_2022JP::~CharsetRecog_2022JP() {}

722

723 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText textIn, CharsetM atch results) const

724 return (confidence > 0);

725 }

726

727 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

728 CharsetRecog_2022KR::~CharsetRecog_2022KR() {}

729

730 const char *CharsetRecog_2022KR::getName() const {

731 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText textIn, CharsetM atch results) const

732 results->set(textIn, this, confidence);

733 return (confidence > 0);

734 }

735 +#endif

736

737 CharsetRecog_2022::~CharsetRecog_2022() {

738 // nothing to do

739 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h

740 index 2ac2b87..dad22c7 100644

741 --- a/source/i18n/csr2022.h

742 +++ b/source/i18n/csr2022.h

743 @@ -65,6 +65,7 @@ public:

744 UBool match(InputText textIn, CharsetMatch results) const;

745 };

746

747 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

748 class CharsetRecog_2022KR :public CharsetRecog_2022 {

749 public:

750 virtual ~CharsetRecog_2022KR();

751 @@ -84,6 +85,7 @@ public:

752

753 UBool match(InputText textIn, CharsetMatch results) const;

754 };

755 +#endif

756

757 U_NAMESPACE_END

758

759 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp

760 index d03367c..7b70dc1 100644

761 --- a/source/i18n/csrsbcs.cpp

762 +++ b/source/i18n/csrsbcs.cpp

763 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)

764 return (int32_t) (rawPercent * 300.0);

765 }

766

767 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

768 static const uint8_t unshapeMap_IBM420[] = {

769 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */

770 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 x40, 0x40, 0x40, 0x40, 0x40,

771 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)

772 }

773 }

774 }

775 +#endif

776

777 CharsetRecog_sbcs::CharsetRecog_sbcs()

778 {

779 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {

780 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,

781 };

782

783 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

784 static const int32_t ngrams_IBM424_he_rtl[] = {

785 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404 546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405 641,

786 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454 056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514 045,

787 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {

788 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0 xEB, 0x40, 0xED, 0xEE, 0xEF,

789 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 xFB, 0xFC, 0xFD, 0xFE, 0x40,

790 };

791 +#endif

792

793 //ISO-8859-1,2,5,6,7,8,9 Ngrams

794

795 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText textIn, Charse tMatch results) const

796 return (confidence > 0);

797 }

798

799 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

800 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()

801 {

802 // nothing to do

803 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText textIn, CharsetMatch results

804 results->set(textIn, this, confidence);

805 return (confidence > 0);

806 }

807 +#endif

808

809 U_NAMESPACE_END

810 #endif

811 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h

812 index 2579c02..7789f9b 100644

813 --- a/source/i18n/csrsbcs.h

814 +++ b/source/i18n/csrsbcs.h

815 @@ -50,6 +50,7 @@ public:

816

817 };

818

819 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

820 class NGramParser_IBM420 : public NGramParser

821 {

822 private:

823 @@ -61,6 +62,7 @@ private:

824 public:

825 NGramParser_IBM420(const int32_t theNgramList, const uint8_t theCharMap);

826 };

827 +#endif

828

829

830 class CharsetRecog_sbcs : public CharsetRecognizer

831 @@ -229,6 +231,7 @@ public:

832 virtual UBool match(InputText det, CharsetMatch results) const;

833 };

834

835 +#if !UCONFIG_NO_NON_HTML5_CONVERSION

836 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs

837 {

838 public:

839 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42 0_ar {

840

841 virtual UBool match(InputText det, CharsetMatch results) const;

842 };

843 +#endif

844

845 U_NAMESPACE_END

846

OLD	NEW

« no previous file with comments | « patches/linebrk.patch ('k') | patches/vscomp.patch » ('j') | no next file with comments »