third_party/expat/files/lib/xmltok.c - Issue 2761253002: Update expat to 2.2.0 to fix CVE vulnerability.

Side by Side Diff: third_party/expat/files/lib/xmltok.c

Issue 2761253002: Update expat to 2.2.0 to fix CVE vulnerability. (Closed)

Patch Set: update README.chromium Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd	1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd

2 See the file COPYING for copying permission.	2 See the file COPYING for copying permission.

3 */	3 */

4	4

5 #include <stddef.h>	5 #include <stddef.h>

6	6

7 #ifdef COMPILED_FROM_DSP	7 #ifdef WIN32

8 #include "winconfig.h"	8 #include "winconfig.h"

9 #elif defined(MACOS_CLASSIC)	9 #elif defined(MACOS_CLASSIC)

10 #include "macconfig.h"	10 #include "macconfig.h"

11 #elif defined(__amigaos__)	11 #elif defined(__amigaos__)

12 #include "amigaconfig.h"	12 #include "amigaconfig.h"

13 #elif defined(__WATCOMC__)	13 #elif defined(__WATCOMC__)

14 #include "watcomconfig.h"	14 #include "watcomconfig.h"

15 #else	15 #else

16 #ifdef HAVE_EXPAT_CONFIG_H	16 #ifdef HAVE_EXPAT_CONFIG_H

17 #include <expat_config.h>	17 #include <expat_config.h>

18 #endif	18 #endif

19 #endif /* ndef COMPILED_FROM_DSP */	19 #endif /* ndef WIN32 */

20	20

21 #include "expat_external.h"	21 #include "expat_external.h"

22 #include "internal.h"	22 #include "internal.h"

23 #include "xmltok.h"	23 #include "xmltok.h"

24 #include "nametab.h"	24 #include "nametab.h"

25	25

26 #ifdef XML_DTD	26 #ifdef XML_DTD

27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)	27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)

28 #else	28 #else

29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */	29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */

30 #endif	30 #endif

31	31

32 #define VTABLE1 \	32 #define VTABLE1 \

33 { PREFIX(prologTok), PREFIX(contentTok), \	33 { PREFIX(prologTok), PREFIX(contentTok), \

34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \	34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \

35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \	35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \

36 PREFIX(sameName), \	36 PREFIX(sameName), \

37 PREFIX(nameMatchesAscii), \	37 PREFIX(nameMatchesAscii), \

38 PREFIX(nameLength), \	38 PREFIX(nameLength), \

39 PREFIX(skipS), \	39 PREFIX(skipS), \

40 PREFIX(getAtts), \	40 PREFIX(getAtts), \

41 PREFIX(charRefNumber), \	41 PREFIX(charRefNumber), \

42 PREFIX(predefinedEntityName), \	42 PREFIX(predefinedEntityName), \

43 PREFIX(updatePosition), \	43 PREFIX(updatePosition), \

44 PREFIX(isPublicId)	44 PREFIX(isPublicId)

45	45

46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)	46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)

47	47

48 #define UCS2_GET_NAMING(pages, hi, lo) \	48 #define UCS2_GET_NAMING(pages, hi, lo) \

49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))	49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))

50	50

51 /* A 2 byte UTF-8 representation splits the characters 11 bits between	51 /* A 2 byte UTF-8 representation splits the characters 11 bits between

52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into	52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into

53 pages, 3 bits to add to that index and 5 bits to generate the mask.	53 pages, 3 bits to add to that index and 5 bits to generate the mask.

54 */	54 */

55 #define UTF8_GET_NAMING2(pages, byte) \	55 #define UTF8_GET_NAMING2(pages, byte) \

56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \	56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \

57 + ((((byte)[0]) & 3) << 1) \	57 + ((((byte)[0]) & 3) << 1) \

58 + ((((byte)[1]) >> 5) & 1)] \	58 + ((((byte)[1]) >> 5) & 1)] \

59 & (1 << (((byte)[1]) & 0x1F)))	59 & (1u << (((byte)[1]) & 0x1F)))

60	60

61 /* A 3 byte UTF-8 representation splits the characters 16 bits between	61 /* A 3 byte UTF-8 representation splits the characters 16 bits between

62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index	62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index

63 into pages, 3 bits to add to that index and 5 bits to generate the	63 into pages, 3 bits to add to that index and 5 bits to generate the

64 mask.	64 mask.

65 */	65 */

66 #define UTF8_GET_NAMING3(pages, byte) \	66 #define UTF8_GET_NAMING3(pages, byte) \

67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \	67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \

68 + ((((byte)[1]) >> 2) & 0xF)] \	68 + ((((byte)[1]) >> 2) & 0xF)] \

69 << 3) \	69 << 3) \

70 + ((((byte)[1]) & 3) << 1) \	70 + ((((byte)[1]) & 3) << 1) \

71 + ((((byte)[2]) >> 5) & 1)] \	71 + ((((byte)[2]) >> 5) & 1)] \

72 & (1 << (((byte)[2]) & 0x1F)))	72 & (1u << (((byte)[2]) & 0x1F)))

73	73

74 #define UTF8_GET_NAMING(pages, p, n) \	74 #define UTF8_GET_NAMING(pages, p, n) \

75 ((n) == 2 \	75 ((n) == 2 \

76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \	76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \

77 : ((n) == 3 \	77 : ((n) == 3 \

78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \	78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \

79 : 0))	79 : 0))

80	80

81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B	81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B

82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/	82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
115 \|\| \	115 \|\| \

116 ((*p) == 0xF0 \	116 ((*p) == 0xF0 \

117 ? \	117 ? \

118 (p)[1] < 0x90 \|\| ((p)[1] & 0xC0) == 0xC0 \	118 (p)[1] < 0x90 \|\| ((p)[1] & 0xC0) == 0xC0 \

119 : \	119 : \

120 ((p)[1] & 0x80) == 0 \	120 ((p)[1] & 0x80) == 0 \

121 \|\| \	121 \|\| \

122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))	122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))

123	123

124 static int PTRFASTCALL	124 static int PTRFASTCALL

125 isNever(const ENCODING enc, const char p)	125 isNever(const ENCODING UNUSED_P(enc), const char UNUSED_P(p))

126 {	126 {

127 return 0;	127 return 0;

128 }	128 }

129	129

130 static int PTRFASTCALL	130 static int PTRFASTCALL

131 utf8_isName2(const ENCODING enc, const char p)	131 utf8_isName2(const ENCODING UNUSED_P(enc), const char p)

132 {	132 {

133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);	133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);

134 }	134 }

135	135

136 static int PTRFASTCALL	136 static int PTRFASTCALL

137 utf8_isName3(const ENCODING enc, const char p)	137 utf8_isName3(const ENCODING UNUSED_P(enc), const char p)

138 {	138 {

139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);	139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);

140 }	140 }

141	141

142 #define utf8_isName4 isNever	142 #define utf8_isName4 isNever

143	143

144 static int PTRFASTCALL	144 static int PTRFASTCALL

145 utf8_isNmstrt2(const ENCODING enc, const char p)	145 utf8_isNmstrt2(const ENCODING UNUSED_P(enc), const char p)

146 {	146 {

147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);	147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);

148 }	148 }

149	149

150 static int PTRFASTCALL	150 static int PTRFASTCALL

151 utf8_isNmstrt3(const ENCODING enc, const char p)	151 utf8_isNmstrt3(const ENCODING UNUSED_P(enc), const char p)

152 {	152 {

153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);	153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);

154 }	154 }

155	155

156 #define utf8_isNmstrt4 isNever	156 #define utf8_isNmstrt4 isNever

157	157

158 static int PTRFASTCALL	158 static int PTRFASTCALL

159 utf8_isInvalid2(const ENCODING enc, const char p)	159 utf8_isInvalid2(const ENCODING UNUSED_P(enc), const char p)

160 {	160 {

161 return UTF8_INVALID2((const unsigned char *)p);	161 return UTF8_INVALID2((const unsigned char *)p);

162 }	162 }

163	163

164 static int PTRFASTCALL	164 static int PTRFASTCALL

165 utf8_isInvalid3(const ENCODING enc, const char p)	165 utf8_isInvalid3(const ENCODING UNUSED_P(enc), const char p)

166 {	166 {

167 return UTF8_INVALID3((const unsigned char *)p);	167 return UTF8_INVALID3((const unsigned char *)p);

168 }	168 }

169	169

170 static int PTRFASTCALL	170 static int PTRFASTCALL

171 utf8_isInvalid4(const ENCODING enc, const char p)	171 utf8_isInvalid4(const ENCODING UNUSED_P(enc), const char p)

172 {	172 {

173 return UTF8_INVALID4((const unsigned char *)p);	173 return UTF8_INVALID4((const unsigned char *)p);

174 }	174 }

175	175

176 struct normal_encoding {	176 struct normal_encoding {

177 ENCODING enc;	177 ENCODING enc;

178 unsigned char type[256];	178 unsigned char type[256];

179 #ifdef XML_MIN_SIZE	179 #ifdef XML_MIN_SIZE

180 int (PTRFASTCALL byteType)(const ENCODING , const char *);	180 int (PTRFASTCALL byteType)(const ENCODING , const char *);

181 int (PTRFASTCALL isNameMin)(const ENCODING , const char *);	181 int (PTRFASTCALL isNameMin)(const ENCODING , const char *);

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
215 E ## isName2, \	215 E ## isName2, \

216 E ## isName3, \	216 E ## isName3, \

217 E ## isName4, \	217 E ## isName4, \

218 E ## isNmstrt2, \	218 E ## isNmstrt2, \

219 E ## isNmstrt3, \	219 E ## isNmstrt3, \

220 E ## isNmstrt4, \	220 E ## isNmstrt4, \

221 E ## isInvalid2, \	221 E ## isInvalid2, \

222 E ## isInvalid3, \	222 E ## isInvalid3, \

223 E ## isInvalid4	223 E ## isInvalid4

224	224

	225 #define NULL_VTABLE \

	226 /* isName2 */ NULL, \

	227 /* isName3 */ NULL, \

	228 /* isName4 */ NULL, \

	229 /* isNmstrt2 */ NULL, \

	230 /* isNmstrt3 */ NULL, \

	231 /* isNmstrt4 */ NULL, \

	232 /* isInvalid2 */ NULL, \

	233 /* isInvalid3 */ NULL, \

	234 /* isInvalid4 */ NULL

	235

225 static int FASTCALL checkCharRefNumber(int);	236 static int FASTCALL checkCharRefNumber(int);

226	237

227 #include "xmltok_impl.h"	238 #include "xmltok_impl.h"

228 #include "ascii.h"	239 #include "ascii.h"

229	240

230 #ifdef XML_MIN_SIZE	241 #ifdef XML_MIN_SIZE

231 #define sb_isNameMin isNever	242 #define sb_isNameMin isNever

232 #define sb_isNmstrtMin isNever	243 #define sb_isNmstrtMin isNever

233 #endif	244 #endif

234	245

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
311 #undef IS_NMSTRT_CHAR_MINBPC	322 #undef IS_NMSTRT_CHAR_MINBPC

312 #undef IS_INVALID_CHAR	323 #undef IS_INVALID_CHAR

313	324

314 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */	325 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */

315 UTF8_cval1 = 0x00,	326 UTF8_cval1 = 0x00,

316 UTF8_cval2 = 0xc0,	327 UTF8_cval2 = 0xc0,

317 UTF8_cval3 = 0xe0,	328 UTF8_cval3 = 0xe0,

318 UTF8_cval4 = 0xf0	329 UTF8_cval4 = 0xf0

319 };	330 };

320	331

321 static void PTRCALL	332 void

322 utf8_toUtf8(const ENCODING *enc,	333 align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)

	334 {

	335 const char * fromLim = *fromLimRef;

	336 size_t walked = 0;

	337 for (; fromLim > from; fromLim--, walked++) {

	338 const unsigned char prev = (unsigned char)fromLim[-1];

	339 if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */

	340 if (walked + 1 >= 4) {

	341 fromLim += 4 - 1;

	342 break;

	343 } else {

	344 walked = 0;

	345 }

	346 } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxx x byte */

	347 if (walked + 1 >= 3) {

	348 fromLim += 3 - 1;

	349 break;

	350 } else {

	351 walked = 0;

	352 }

	353 } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxx x byte */

	354 if (walked + 1 >= 2) {

	355 fromLim += 2 - 1;

	356 break;

	357 } else {

	358 walked = 0;

	359 }

	360 } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxx xx */

	361 break;

	362 }

	363 }

	364 *fromLimRef = fromLim;

	365 }

	366

	367 static enum XML_Convert_Result PTRCALL

	368 utf8_toUtf8(const ENCODING *UNUSED_P(enc),

323 const char *fromP, const char fromLim,	369 const char *fromP, const char fromLim,

324 char *toP, const char toLim)	370 char *toP, const char toLim)

325 {	371 {

	372 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;

326 char *to;	373 char *to;

327 const char *from;	374 const char *from;

328 if (fromLim - fromP > toLim - toP) {	375 if (fromLim - fromP > toLim - toP) {

329 /* Avoid copying partial characters. */	376 /* Avoid copying partial characters. */

330 for (fromLim = fromP + (toLim - toP); fromLim > *fromP; fromLim--)	377 res = XML_CONVERT_OUTPUT_EXHAUSTED;

331 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)	378 fromLim = fromP + (toLim - toP);

332 break;	379 align_limit_to_full_utf8_characters(*fromP, &fromLim);

333 }	380 }

334 for (to = toP, from = fromP; from != fromLim; from++, to++)	381 for (to = toP, from = fromP; (from < fromLim) && (to < toLim); from++, to++)

335 to = from;	382 to = from;

336 *fromP = from;	383 *fromP = from;

337 *toP = to;	384 *toP = to;

	385

	386 if ((to == toLim) && (from < fromLim))

	387 return XML_CONVERT_OUTPUT_EXHAUSTED;

	388 else

	389 return res;

338 }	390 }

339	391

340 static void PTRCALL	392 static enum XML_Convert_Result PTRCALL

341 utf8_toUtf16(const ENCODING *enc,	393 utf8_toUtf16(const ENCODING *enc,

342 const char *fromP, const char fromLim,	394 const char *fromP, const char fromLim,

343 unsigned short *toP, const unsigned short toLim)	395 unsigned short *toP, const unsigned short toLim)

344 {	396 {

	397 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;

345 unsigned short to = toP;	398 unsigned short to = toP;

346 const char from = fromP;	399 const char from = fromP;

347 while (from != fromLim && to != toLim) {	400 while (from < fromLim && to < toLim) {

348 switch (((struct normal_encoding )enc)->type[(unsigned char)from]) {	401 switch (((struct normal_encoding )enc)->type[(unsigned char)from]) {

349 case BT_LEAD2:	402 case BT_LEAD2:

	403 if (fromLim - from < 2) {

	404 res = XML_CONVERT_INPUT_INCOMPLETE;

	405 goto after;

	406 }

350 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) \| (from[1] & 0x3f));	407 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) \| (from[1] & 0x3f));

351 from += 2;	408 from += 2;

352 break;	409 break;

353 case BT_LEAD3:	410 case BT_LEAD3:

	411 if (fromLim - from < 3) {

	412 res = XML_CONVERT_INPUT_INCOMPLETE;

	413 goto after;

	414 }

354 *to++ = (unsigned short)(((from[0] & 0xf) << 12)	415 *to++ = (unsigned short)(((from[0] & 0xf) << 12)

355 \| ((from[1] & 0x3f) << 6) \| (from[2] & 0x3f));	416 \| ((from[1] & 0x3f) << 6) \| (from[2] & 0x3f));

356 from += 3;	417 from += 3;

357 break;	418 break;

358 case BT_LEAD4:	419 case BT_LEAD4:

359 {	420 {

360 unsigned long n;	421 unsigned long n;

361 if (to + 1 == toLim)	422 if (toLim - to < 2) {

	423 res = XML_CONVERT_OUTPUT_EXHAUSTED;

362 goto after;	424 goto after;

	425 }

	426 if (fromLim - from < 4) {

	427 res = XML_CONVERT_INPUT_INCOMPLETE;

	428 goto after;

	429 }

363 n = ((from[0] & 0x7) << 18) \| ((from[1] & 0x3f) << 12)	430 n = ((from[0] & 0x7) << 18) \| ((from[1] & 0x3f) << 12)

364 \| ((from[2] & 0x3f) << 6) \| (from[3] & 0x3f);	431 \| ((from[2] & 0x3f) << 6) \| (from[3] & 0x3f);

365 n -= 0x10000;	432 n -= 0x10000;

366 to[0] = (unsigned short)((n >> 10) \| 0xD800);	433 to[0] = (unsigned short)((n >> 10) \| 0xD800);

367 to[1] = (unsigned short)((n & 0x3FF) \| 0xDC00);	434 to[1] = (unsigned short)((n & 0x3FF) \| 0xDC00);

368 to += 2;	435 to += 2;

369 from += 4;	436 from += 4;

370 }	437 }

371 break;	438 break;

372 default:	439 default:

373 to++ = from++;	440 to++ = from++;

374 break;	441 break;

375 }	442 }

376 }	443 }

	444 if (from < fromLim)

	445 res = XML_CONVERT_OUTPUT_EXHAUSTED;

377 after:	446 after:

378 *fromP = from;	447 *fromP = from;

379 *toP = to;	448 *toP = to;

	449 return res;

380 }	450 }

381	451

382 #ifdef XML_NS	452 #ifdef XML_NS

383 static const struct normal_encoding utf8_encoding_ns = {	453 static const struct normal_encoding utf8_encoding_ns = {

384 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },	454 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },

385 {	455 {

386 #include "asciitab.h"	456 #include "asciitab.h"

387 #include "utf8tab.h"	457 #include "utf8tab.h"

388 },	458 },

389 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)	459 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)

(...skipping 28 matching lines...) Expand all Loading...
418 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },	488 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },

419 {	489 {

420 #define BT_COLON BT_NMSTRT	490 #define BT_COLON BT_NMSTRT

421 #include "iasciitab.h"	491 #include "iasciitab.h"

422 #undef BT_COLON	492 #undef BT_COLON

423 #include "utf8tab.h"	493 #include "utf8tab.h"

424 },	494 },

425 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)	495 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)

426 };	496 };

427	497

428 static void PTRCALL	498 static enum XML_Convert_Result PTRCALL

429 latin1_toUtf8(const ENCODING *enc,	499 latin1_toUtf8(const ENCODING *UNUSED_P(enc),

430 const char *fromP, const char fromLim,	500 const char *fromP, const char fromLim,

431 char *toP, const char toLim)	501 char *toP, const char toLim)

432 {	502 {

433 for (;;) {	503 for (;;) {

434 unsigned char c;	504 unsigned char c;

435 if (*fromP == fromLim)	505 if (*fromP == fromLim)

436 break;	506 return XML_CONVERT_COMPLETED;

437 c = (unsigned char)**fromP;	507 c = (unsigned char)**fromP;

438 if (c & 0x80) {	508 if (c & 0x80) {

439 if (toLim - *toP < 2)	509 if (toLim - *toP < 2)

440 break;	510 return XML_CONVERT_OUTPUT_EXHAUSTED;

441 (toP)++ = (char)((c >> 6) \| UTF8_cval2);	511 (toP)++ = (char)((c >> 6) \| UTF8_cval2);

442 (toP)++ = (char)((c & 0x3f) \| 0x80);	512 (toP)++ = (char)((c & 0x3f) \| 0x80);

443 (*fromP)++;	513 (*fromP)++;

444 }	514 }

445 else {	515 else {

446 if (*toP == toLim)	516 if (*toP == toLim)

447 break;	517 return XML_CONVERT_OUTPUT_EXHAUSTED;

448 (toP)++ = (fromP)++;	518 (toP)++ = (fromP)++;

449 }	519 }

450 }	520 }

451 }	521 }

452	522

453 static void PTRCALL	523 static enum XML_Convert_Result PTRCALL

454 latin1_toUtf16(const ENCODING *enc,	524 latin1_toUtf16(const ENCODING *UNUSED_P(enc),

455 const char *fromP, const char fromLim,	525 const char *fromP, const char fromLim,

456 unsigned short *toP, const unsigned short toLim)	526 unsigned short *toP, const unsigned short toLim)

457 {	527 {

458 while (fromP != fromLim && toP != toLim)	528 while (fromP < fromLim && toP < toLim)

459 (toP)++ = (unsigned char)(fromP)++;	529 (toP)++ = (unsigned char)(fromP)++;

	530

	531 if ((toP == toLim) && (fromP < fromLim))

	532 return XML_CONVERT_OUTPUT_EXHAUSTED;

	533 else

	534 return XML_CONVERT_COMPLETED;

460 }	535 }

461	536

462 #ifdef XML_NS	537 #ifdef XML_NS

463	538

464 static const struct normal_encoding latin1_encoding_ns = {	539 static const struct normal_encoding latin1_encoding_ns = {

465 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },	540 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },

466 {	541 {

467 #include "asciitab.h"	542 #include "asciitab.h"

468 #include "latin1tab.h"	543 #include "latin1tab.h"

469 },	544 },

470 STANDARD_VTABLE(sb_)	545 STANDARD_VTABLE(sb_) NULL_VTABLE

471 };	546 };

472	547

473 #endif	548 #endif

474	549

475 static const struct normal_encoding latin1_encoding = {	550 static const struct normal_encoding latin1_encoding = {

476 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },	551 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },

477 {	552 {

478 #define BT_COLON BT_NMSTRT	553 #define BT_COLON BT_NMSTRT

479 #include "asciitab.h"	554 #include "asciitab.h"

480 #undef BT_COLON	555 #undef BT_COLON

481 #include "latin1tab.h"	556 #include "latin1tab.h"

482 },	557 },

483 STANDARD_VTABLE(sb_)	558 STANDARD_VTABLE(sb_) NULL_VTABLE

484 };	559 };

485	560

486 static void PTRCALL	561 static enum XML_Convert_Result PTRCALL

487 ascii_toUtf8(const ENCODING *enc,	562 ascii_toUtf8(const ENCODING *UNUSED_P(enc),

488 const char *fromP, const char fromLim,	563 const char *fromP, const char fromLim,

489 char *toP, const char toLim)	564 char *toP, const char toLim)

490 {	565 {

491 while (fromP != fromLim && toP != toLim)	566 while (fromP < fromLim && toP < toLim)

492 (toP)++ = (fromP)++;	567 (toP)++ = (fromP)++;

	568

	569 if ((toP == toLim) && (fromP < fromLim))

	570 return XML_CONVERT_OUTPUT_EXHAUSTED;

	571 else

	572 return XML_CONVERT_COMPLETED;

493 }	573 }

494	574

495 #ifdef XML_NS	575 #ifdef XML_NS

496	576

497 static const struct normal_encoding ascii_encoding_ns = {	577 static const struct normal_encoding ascii_encoding_ns = {

498 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },	578 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },

499 {	579 {

500 #include "asciitab.h"	580 #include "asciitab.h"

501 /* BT_NONXML == 0 */	581 /* BT_NONXML == 0 */

502 },	582 },

503 STANDARD_VTABLE(sb_)	583 STANDARD_VTABLE(sb_) NULL_VTABLE

504 };	584 };

505	585

506 #endif	586 #endif

507	587

508 static const struct normal_encoding ascii_encoding = {	588 static const struct normal_encoding ascii_encoding = {

509 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },	589 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },

510 {	590 {

511 #define BT_COLON BT_NMSTRT	591 #define BT_COLON BT_NMSTRT

512 #include "asciitab.h"	592 #include "asciitab.h"

513 #undef BT_COLON	593 #undef BT_COLON

514 /* BT_NONXML == 0 */	594 /* BT_NONXML == 0 */

515 },	595 },

516 STANDARD_VTABLE(sb_)	596 STANDARD_VTABLE(sb_) NULL_VTABLE

517 };	597 };

518	598

519 static int PTRFASTCALL	599 static int PTRFASTCALL

520 unicode_byte_type(char hi, char lo)	600 unicode_byte_type(char hi, char lo)

521 {	601 {

522 switch ((unsigned char)hi) {	602 switch ((unsigned char)hi) {

523 case 0xD8: case 0xD9: case 0xDA: case 0xDB:	603 case 0xD8: case 0xD9: case 0xDA: case 0xDB:

524 return BT_LEAD4;	604 return BT_LEAD4;

525 case 0xDC: case 0xDD: case 0xDE: case 0xDF:	605 case 0xDC: case 0xDD: case 0xDE: case 0xDF:

526 return BT_TRAIL;	606 return BT_TRAIL;

527 case 0xFF:	607 case 0xFF:

528 switch ((unsigned char)lo) {	608 switch ((unsigned char)lo) {

529 case 0xFF:	609 case 0xFF:

530 case 0xFE:	610 case 0xFE:

531 return BT_NONXML;	611 return BT_NONXML;

532 }	612 }

533 break;	613 break;

534 }	614 }

535 return BT_NONASCII;	615 return BT_NONASCII;

536 }	616 }

537	617

538 #define DEFINE_UTF16_TO_UTF8(E) \	618 #define DEFINE_UTF16_TO_UTF8(E) \

539 static void PTRCALL \	619 static enum XML_Convert_Result PTRCALL \

540 E ## toUtf8(const ENCODING *enc, \	620 E ## toUtf8(const ENCODING *UNUSED_P(enc), \

541 const char *fromP, const char fromLim, \	621 const char *fromP, const char fromLim, \

542 char *toP, const char toLim) \	622 char *toP, const char toLim) \

543 { \	623 { \

544 const char *from; \	624 const char from = fromP; \

545 for (from = *fromP; from != fromLim; from += 2) { \	625 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \

	626 for (; from < fromLim; from += 2) { \

546 int plane; \	627 int plane; \

547 unsigned char lo2; \	628 unsigned char lo2; \

548 unsigned char lo = GET_LO(from); \	629 unsigned char lo = GET_LO(from); \

549 unsigned char hi = GET_HI(from); \	630 unsigned char hi = GET_HI(from); \

550 switch (hi) { \	631 switch (hi) { \

551 case 0: \	632 case 0: \

552 if (lo < 0x80) { \	633 if (lo < 0x80) { \

553 if (*toP == toLim) { \	634 if (*toP == toLim) { \

554 *fromP = from; \	635 *fromP = from; \

555 return; \	636 return XML_CONVERT_OUTPUT_EXHAUSTED; \

556 } \	637 } \

557 (toP)++ = lo; \	638 (toP)++ = lo; \

558 break; \	639 break; \

559 } \	640 } \

560 /* fall through */ \	641 /* fall through */ \

561 case 0x1: case 0x2: case 0x3: \	642 case 0x1: case 0x2: case 0x3: \

562 case 0x4: case 0x5: case 0x6: case 0x7: \	643 case 0x4: case 0x5: case 0x6: case 0x7: \

563 if (toLim - *toP < 2) { \	644 if (toLim - *toP < 2) { \

564 *fromP = from; \	645 *fromP = from; \

565 return; \	646 return XML_CONVERT_OUTPUT_EXHAUSTED; \

566 } \	647 } \

567 (toP)++ = ((lo >> 6) \| (hi << 2) \| UTF8_cval2); \	648 (toP)++ = ((lo >> 6) \| (hi << 2) \| UTF8_cval2); \

568 (toP)++ = ((lo & 0x3f) \| 0x80); \	649 (toP)++ = ((lo & 0x3f) \| 0x80); \

569 break; \	650 break; \

570 default: \	651 default: \

571 if (toLim - *toP < 3) { \	652 if (toLim - *toP < 3) { \

572 *fromP = from; \	653 *fromP = from; \

573 return; \	654 return XML_CONVERT_OUTPUT_EXHAUSTED; \

574 } \	655 } \

575 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \	656 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \

576 (toP)++ = ((hi >> 4) \| UTF8_cval3); \	657 (toP)++ = ((hi >> 4) \| UTF8_cval3); \

577 (toP)++ = (((hi & 0xf) << 2) \| (lo >> 6) \| 0x80); \	658 (toP)++ = (((hi & 0xf) << 2) \| (lo >> 6) \| 0x80); \

578 (toP)++ = ((lo & 0x3f) \| 0x80); \	659 (toP)++ = ((lo & 0x3f) \| 0x80); \

579 break; \	660 break; \

580 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \	661 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \

581 if (toLim - *toP < 4) { \	662 if (toLim - *toP < 4) { \

582 *fromP = from; \	663 *fromP = from; \

583 return; \	664 return XML_CONVERT_OUTPUT_EXHAUSTED; \

	665 } \

	666 if (fromLim - from < 4) { \

	667 *fromP = from; \

	668 return XML_CONVERT_INPUT_INCOMPLETE; \

584 } \	669 } \

585 plane = (((hi & 0x3) << 2) \| ((lo >> 6) & 0x3)) + 1; \	670 plane = (((hi & 0x3) << 2) \| ((lo >> 6) & 0x3)) + 1; \

586 (toP)++ = ((plane >> 2) \| UTF8_cval4); \	671 (toP)++ = ((plane >> 2) \| UTF8_cval4); \

587 (toP)++ = (((lo >> 2) & 0xF) \| ((plane & 0x3) << 4) \| 0x80); \	672 (toP)++ = (((lo >> 2) & 0xF) \| ((plane & 0x3) << 4) \| 0x80); \

588 from += 2; \	673 from += 2; \

589 lo2 = GET_LO(from); \	674 lo2 = GET_LO(from); \

590 (toP)++ = (((lo & 0x3) << 4) \	675 (toP)++ = (((lo & 0x3) << 4) \

591 \| ((GET_HI(from) & 0x3) << 2) \	676 \| ((GET_HI(from) & 0x3) << 2) \

592 \| (lo2 >> 6) \	677 \| (lo2 >> 6) \

593 \| 0x80); \	678 \| 0x80); \

594 (toP)++ = ((lo2 & 0x3f) \| 0x80); \	679 (toP)++ = ((lo2 & 0x3f) \| 0x80); \

595 break; \	680 break; \

596 } \	681 } \

597 } \	682 } \

598 *fromP = from; \	683 *fromP = from; \

	684 if (from < fromLim) \

	685 return XML_CONVERT_INPUT_INCOMPLETE; \

	686 else \

	687 return XML_CONVERT_COMPLETED; \

599 }	688 }

600	689

601 #define DEFINE_UTF16_TO_UTF16(E) \	690 #define DEFINE_UTF16_TO_UTF16(E) \

602 static void PTRCALL \	691 static enum XML_Convert_Result PTRCALL \

603 E ## toUtf16(const ENCODING *enc, \	692 E ## toUtf16(const ENCODING *UNUSED_P(enc), \

604 const char *fromP, const char fromLim, \	693 const char *fromP, const char fromLim, \

605 unsigned short *toP, const unsigned short toLim) \	694 unsigned short *toP, const unsigned short toLim) \

606 { \	695 { \

	696 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \

	697 fromLim = fromP + (((fromLim - fromP) >> 1) << 1); /* shrink to even */ \

607 /* Avoid copying first half only of surrogate */ \	698 /* Avoid copying first half only of surrogate */ \

608 if (fromLim - fromP > ((toLim - toP) << 1) \	699 if (fromLim - fromP > ((toLim - toP) << 1) \

609 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \	700 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \

610 fromLim -= 2; \	701 fromLim -= 2; \

611 for (; fromP != fromLim && toP != toLim; *fromP += 2) \	702 res = XML_CONVERT_INPUT_INCOMPLETE; \

	703 } \

	704 for (; fromP < fromLim && toP < toLim; *fromP += 2) \

612 (toP)++ = (GET_HI(fromP) << 8) \| GET_LO(fromP); \	705 (toP)++ = (GET_HI(fromP) << 8) \| GET_LO(fromP); \

	706 if ((toP == toLim) && (fromP < fromLim)) \

	707 return XML_CONVERT_OUTPUT_EXHAUSTED; \

	708 else \

	709 return res; \

613 }	710 }

614	711

615 #define SET2(ptr, ch) \	712 #define SET2(ptr, ch) \

616 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))	713 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))

617 #define GET_LO(ptr) ((unsigned char)(ptr)[0])	714 #define GET_LO(ptr) ((unsigned char)(ptr)[0])

618 #define GET_HI(ptr) ((unsigned char)(ptr)[1])	715 #define GET_HI(ptr) ((unsigned char)(ptr)[1])

619	716

620 DEFINE_UTF16_TO_UTF8(little2_)	717 DEFINE_UTF16_TO_UTF8(little2_)

621 DEFINE_UTF16_TO_UTF16(little2_)	718 DEFINE_UTF16_TO_UTF16(little2_)

622	719

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
719 #if BYTEORDER == 1234	816 #if BYTEORDER == 1234

720 1	817 1

721 #else	818 #else

722 0	819 0

723 #endif	820 #endif

724 },	821 },

725 {	822 {

726 #include "asciitab.h"	823 #include "asciitab.h"

727 #include "latin1tab.h"	824 #include "latin1tab.h"

728 },	825 },

729 STANDARD_VTABLE(little2_)	826 STANDARD_VTABLE(little2_) NULL_VTABLE

730 };	827 };

731	828

732 #endif	829 #endif

733	830

734 static const struct normal_encoding little2_encoding = {	831 static const struct normal_encoding little2_encoding = {

735 { VTABLE, 2, 0,	832 { VTABLE, 2, 0,

736 #if BYTEORDER == 1234	833 #if BYTEORDER == 1234

737 1	834 1

738 #else	835 #else

739 0	836 0

740 #endif	837 #endif

741 },	838 },

742 {	839 {

743 #define BT_COLON BT_NMSTRT	840 #define BT_COLON BT_NMSTRT

744 #include "asciitab.h"	841 #include "asciitab.h"

745 #undef BT_COLON	842 #undef BT_COLON

746 #include "latin1tab.h"	843 #include "latin1tab.h"

747 },	844 },

748 STANDARD_VTABLE(little2_)	845 STANDARD_VTABLE(little2_) NULL_VTABLE

749 };	846 };

750	847

751 #if BYTEORDER != 4321	848 #if BYTEORDER != 4321

752	849

753 #ifdef XML_NS	850 #ifdef XML_NS

754	851

755 static const struct normal_encoding internal_little2_encoding_ns = {	852 static const struct normal_encoding internal_little2_encoding_ns = {

756 { VTABLE, 2, 0, 1 },	853 { VTABLE, 2, 0, 1 },

757 {	854 {

758 #include "iasciitab.h"	855 #include "iasciitab.h"

759 #include "latin1tab.h"	856 #include "latin1tab.h"

760 },	857 },

761 STANDARD_VTABLE(little2_)	858 STANDARD_VTABLE(little2_) NULL_VTABLE

762 };	859 };

763	860

764 #endif	861 #endif

765	862

766 static const struct normal_encoding internal_little2_encoding = {	863 static const struct normal_encoding internal_little2_encoding = {

767 { VTABLE, 2, 0, 1 },	864 { VTABLE, 2, 0, 1 },

768 {	865 {

769 #define BT_COLON BT_NMSTRT	866 #define BT_COLON BT_NMSTRT

770 #include "iasciitab.h"	867 #include "iasciitab.h"

771 #undef BT_COLON	868 #undef BT_COLON

772 #include "latin1tab.h"	869 #include "latin1tab.h"

773 },	870 },

774 STANDARD_VTABLE(little2_)	871 STANDARD_VTABLE(little2_) NULL_VTABLE

775 };	872 };

776	873

777 #endif	874 #endif

778	875

779	876

780 #define BIG2_BYTE_TYPE(enc, p) \	877 #define BIG2_BYTE_TYPE(enc, p) \

781 ((p)[0] == 0 \	878 ((p)[0] == 0 \

782 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \	879 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \

783 : unicode_byte_type((p)[0], (p)[1]))	880 : unicode_byte_type((p)[0], (p)[1]))

784 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)	881 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
860 #if BYTEORDER == 4321	957 #if BYTEORDER == 4321

861 1	958 1

862 #else	959 #else

863 0	960 0

864 #endif	961 #endif

865 },	962 },

866 {	963 {

867 #include "asciitab.h"	964 #include "asciitab.h"

868 #include "latin1tab.h"	965 #include "latin1tab.h"

869 },	966 },

870 STANDARD_VTABLE(big2_)	967 STANDARD_VTABLE(big2_) NULL_VTABLE

871 };	968 };

872	969

873 #endif	970 #endif

874	971

875 static const struct normal_encoding big2_encoding = {	972 static const struct normal_encoding big2_encoding = {

876 { VTABLE, 2, 0,	973 { VTABLE, 2, 0,

877 #if BYTEORDER == 4321	974 #if BYTEORDER == 4321

878 1	975 1

879 #else	976 #else

880 0	977 0

881 #endif	978 #endif

882 },	979 },

883 {	980 {

884 #define BT_COLON BT_NMSTRT	981 #define BT_COLON BT_NMSTRT

885 #include "asciitab.h"	982 #include "asciitab.h"

886 #undef BT_COLON	983 #undef BT_COLON

887 #include "latin1tab.h"	984 #include "latin1tab.h"

888 },	985 },

889 STANDARD_VTABLE(big2_)	986 STANDARD_VTABLE(big2_) NULL_VTABLE

890 };	987 };

891	988

892 #if BYTEORDER != 1234	989 #if BYTEORDER != 1234

893	990

894 #ifdef XML_NS	991 #ifdef XML_NS

895	992

896 static const struct normal_encoding internal_big2_encoding_ns = {	993 static const struct normal_encoding internal_big2_encoding_ns = {

897 { VTABLE, 2, 0, 1 },	994 { VTABLE, 2, 0, 1 },

898 {	995 {

899 #include "iasciitab.h"	996 #include "iasciitab.h"

900 #include "latin1tab.h"	997 #include "latin1tab.h"

901 },	998 },

902 STANDARD_VTABLE(big2_)	999 STANDARD_VTABLE(big2_) NULL_VTABLE

903 };	1000 };

904	1001

905 #endif	1002 #endif

906	1003

907 static const struct normal_encoding internal_big2_encoding = {	1004 static const struct normal_encoding internal_big2_encoding = {

908 { VTABLE, 2, 0, 1 },	1005 { VTABLE, 2, 0, 1 },

909 {	1006 {

910 #define BT_COLON BT_NMSTRT	1007 #define BT_COLON BT_NMSTRT

911 #include "iasciitab.h"	1008 #include "iasciitab.h"

912 #undef BT_COLON	1009 #undef BT_COLON

913 #include "latin1tab.h"	1010 #include "latin1tab.h"

914 },	1011 },

915 STANDARD_VTABLE(big2_)	1012 STANDARD_VTABLE(big2_) NULL_VTABLE

916 };	1013 };

917	1014

918 #endif	1015 #endif

919	1016

920 #undef PREFIX	1017 #undef PREFIX

921	1018

922 static int FASTCALL	1019 static int FASTCALL

923 streqci(const char s1, const char s2)	1020 streqci(const char s1, const char s2)

924 {	1021 {

925 for (;;) {	1022 for (;;) {

926 char c1 = *s1++;	1023 char c1 = *s1++;

927 char c2 = *s2++;	1024 char c2 = *s2++;

928 if (ASCII_a <= c1 && c1 <= ASCII_z)	1025 if (ASCII_a <= c1 && c1 <= ASCII_z)

929 c1 += ASCII_A - ASCII_a;	1026 c1 += ASCII_A - ASCII_a;

930 if (ASCII_a <= c2 && c2 <= ASCII_z)	1027 if (ASCII_a <= c2 && c2 <= ASCII_z)

931 c2 += ASCII_A - ASCII_a;	1028 c2 += ASCII_A - ASCII_a;

932 if (c1 != c2)	1029 if (c1 != c2)

933 return 0;	1030 return 0;

934 if (!c1)	1031 if (!c1)

935 break;	1032 break;

936 }	1033 }

937 return 1;	1034 return 1;

938 }	1035 }

939	1036

940 static void PTRCALL	1037 static void PTRCALL

941 initUpdatePosition(const ENCODING enc, const char ptr,	1038 initUpdatePosition(const ENCODING UNUSED_P(enc), const char ptr,

942 const char end, POSITION pos)	1039 const char end, POSITION pos)

943 {	1040 {

944 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);	1041 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);

945 }	1042 }

946	1043

947 static int	1044 static int

948 toAscii(const ENCODING enc, const char ptr, const char *end)	1045 toAscii(const ENCODING enc, const char ptr, const char *end)

949 {	1046 {

950 char buf[1];	1047 char buf[1];

951 char *p = buf;	1048 char *p = buf;

(...skipping 329 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1281 }	1378 }

1282	1379

1283 static int PTRFASTCALL	1380 static int PTRFASTCALL

1284 unknown_isInvalid(const ENCODING enc, const char p)	1381 unknown_isInvalid(const ENCODING enc, const char p)

1285 {	1382 {

1286 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);	1383 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);

1287 int c = uenc->convert(uenc->userData, p);	1384 int c = uenc->convert(uenc->userData, p);

1288 return (c & ~0xFFFF) \|\| checkCharRefNumber(c) < 0;	1385 return (c & ~0xFFFF) \|\| checkCharRefNumber(c) < 0;

1289 }	1386 }

1290	1387

1291 static void PTRCALL	1388 static enum XML_Convert_Result PTRCALL

1292 unknown_toUtf8(const ENCODING *enc,	1389 unknown_toUtf8(const ENCODING *enc,

1293 const char *fromP, const char fromLim,	1390 const char *fromP, const char fromLim,

1294 char *toP, const char toLim)	1391 char *toP, const char toLim)

1295 {	1392 {

1296 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);	1393 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);

1297 char buf[XML_UTF8_ENCODE_MAX];	1394 char buf[XML_UTF8_ENCODE_MAX];

1298 for (;;) {	1395 for (;;) {

1299 const char *utf8;	1396 const char *utf8;

1300 int n;	1397 int n;

1301 if (*fromP == fromLim)	1398 if (*fromP == fromLim)

1302 break;	1399 return XML_CONVERT_COMPLETED;

1303 utf8 = uenc->utf8[(unsigned char)**fromP];	1400 utf8 = uenc->utf8[(unsigned char)**fromP];

1304 n = *utf8++;	1401 n = *utf8++;

1305 if (n == 0) {	1402 if (n == 0) {

1306 int c = uenc->convert(uenc->userData, *fromP);	1403 int c = uenc->convert(uenc->userData, *fromP);

1307 n = XmlUtf8Encode(c, buf);	1404 n = XmlUtf8Encode(c, buf);

1308 if (n > toLim - *toP)	1405 if (n > toLim - *toP)

1309 break;	1406 return XML_CONVERT_OUTPUT_EXHAUSTED;

1310 utf8 = buf;	1407 utf8 = buf;

1311 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]	1408 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]

1312 - (BT_LEAD2 - 2));	1409 - (BT_LEAD2 - 2));

1313 }	1410 }

1314 else {	1411 else {

1315 if (n > toLim - *toP)	1412 if (n > toLim - *toP)

1316 break;	1413 return XML_CONVERT_OUTPUT_EXHAUSTED;

1317 (*fromP)++;	1414 (*fromP)++;

1318 }	1415 }

1319 do {	1416 do {

1320 (toP)++ = *utf8++;	1417 (toP)++ = *utf8++;

1321 } while (--n != 0);	1418 } while (--n != 0);

1322 }	1419 }

1323 }	1420 }

1324	1421

1325 static void PTRCALL	1422 static enum XML_Convert_Result PTRCALL

1326 unknown_toUtf16(const ENCODING *enc,	1423 unknown_toUtf16(const ENCODING *enc,

1327 const char *fromP, const char fromLim,	1424 const char *fromP, const char fromLim,

1328 unsigned short *toP, const unsigned short toLim)	1425 unsigned short *toP, const unsigned short toLim)

1329 {	1426 {

1330 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);	1427 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);

1331 while (fromP != fromLim && toP != toLim) {	1428 while (fromP < fromLim && toP < toLim) {

1332 unsigned short c = uenc->utf16[(unsigned char)**fromP];	1429 unsigned short c = uenc->utf16[(unsigned char)**fromP];

1333 if (c == 0) {	1430 if (c == 0) {

1334 c = (unsigned short)	1431 c = (unsigned short)

1335 uenc->convert(uenc->userData, *fromP);	1432 uenc->convert(uenc->userData, *fromP);

1336 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]	1433 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]

1337 - (BT_LEAD2 - 2));	1434 - (BT_LEAD2 - 2));

1338 }	1435 }

1339 else	1436 else

1340 (*fromP)++;	1437 (*fromP)++;

1341 (toP)++ = c;	1438 (toP)++ = c;

1342 }	1439 }

	1440

	1441 if ((toP == toLim) && (fromP < fromLim))

	1442 return XML_CONVERT_OUTPUT_EXHAUSTED;

	1443 else

	1444 return XML_CONVERT_COMPLETED;

1343 }	1445 }

1344	1446

1345 ENCODING *	1447 ENCODING *

1346 XmlInitUnknownEncoding(void *mem,	1448 XmlInitUnknownEncoding(void *mem,

1347 int *table,	1449 int *table,

1348 CONVERTER convert,	1450 CONVERTER convert,

1349 void *userData)	1451 void *userData)

1350 {	1452 {

1351 int i;	1453 int i;

1352 struct unknown_encoding e = (struct unknown_encoding )mem;	1454 struct unknown_encoding e = (struct unknown_encoding )mem;

(...skipping 143 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1496 static int	1598 static int

1497 initScan(const ENCODING * const *encodingTable,	1599 initScan(const ENCODING * const *encodingTable,

1498 const INIT_ENCODING *enc,	1600 const INIT_ENCODING *enc,

1499 int state,	1601 int state,

1500 const char *ptr,	1602 const char *ptr,

1501 const char *end,	1603 const char *end,

1502 const char **nextTokPtr)	1604 const char **nextTokPtr)

1503 {	1605 {

1504 const ENCODING **encPtr;	1606 const ENCODING **encPtr;

1505	1607

1506 if (ptr == end)	1608 if (ptr >= end)

1507 return XML_TOK_NONE;	1609 return XML_TOK_NONE;

1508 encPtr = enc->encPtr;	1610 encPtr = enc->encPtr;

1509 if (ptr + 1 == end) {	1611 if (ptr + 1 == end) {

1510 /* only a single byte available for auto-detection */	1612 /* only a single byte available for auto-detection */

1511 #ifndef XML_DTD /* FIXME */	1613 #ifndef XML_DTD /* FIXME */

1512 /* a well-formed document entity must have more than one byte */	1614 /* a well-formed document entity must have more than one byte */

1513 if (state != XML_CONTENT_STATE)	1615 if (state != XML_CONTENT_STATE)

1514 return XML_TOK_PARTIAL;	1616 return XML_TOK_PARTIAL;

1515 #endif	1617 #endif

1516 /* so we're parsing an external text entity... */	1618 /* so we're parsing an external text entity... */

(...skipping 125 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1642 CONVERTER convert,	1744 CONVERTER convert,

1643 void *userData)	1745 void *userData)

1644 {	1746 {

1645 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);	1747 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);

1646 if (enc)	1748 if (enc)

1647 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;	1749 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;

1648 return enc;	1750 return enc;

1649 }	1751 }

1650	1752

1651 #endif /* XML_NS */	1753 #endif /* XML_NS */

OLD	NEW

« no previous file with comments | « third_party/expat/files/lib/xmltok.h ('k') | third_party/expat/files/lib/xmltok.c.origin » ('j') | no next file with comments »