third_party/expat/files/lib/xmltok.c.origin - Issue 2761253002: Update expat to 2.2.0 to fix CVE vulnerability.

Side by Side Diff: third_party/expat/files/lib/xmltok.c.origin

Issue 2761253002: Update expat to 2.2.0 to fix CVE vulnerability. (Closed)

Patch Set: update README.chromium Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd	1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd

2 See the file COPYING for copying permission.	2 See the file COPYING for copying permission.

3 */	3 */

4	4

5 #include <stddef.h>	5 #include <stddef.h>

6	6

7 #ifdef COMPILED_FROM_DSP	7 #ifdef WIN32

8 #include "winconfig.h"	8 #include "winconfig.h"

9 #elif defined(MACOS_CLASSIC)	9 #elif defined(MACOS_CLASSIC)

10 #include "macconfig.h"	10 #include "macconfig.h"

11 #elif defined(__amigaos__)	11 #elif defined(__amigaos__)

12 #include "amigaconfig.h"	12 #include "amigaconfig.h"

13 #elif defined(__WATCOMC__)	13 #elif defined(__WATCOMC__)

14 #include "watcomconfig.h"	14 #include "watcomconfig.h"

15 #else	15 #else

16 #ifdef HAVE_EXPAT_CONFIG_H	16 #ifdef HAVE_EXPAT_CONFIG_H

17 #include <expat_config.h>	17 #include <expat_config.h>

18 #endif	18 #endif

19 #endif /* ndef COMPILED_FROM_DSP */	19 #endif /* ndef WIN32 */

20	20

21 #include "expat_external.h"	21 #include "expat_external.h"

22 #include "internal.h"	22 #include "internal.h"

23 #include "xmltok.h"	23 #include "xmltok.h"

24 #include "nametab.h"	24 #include "nametab.h"

25	25

26 #ifdef XML_DTD	26 #ifdef XML_DTD

27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)	27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)

28 #else	28 #else

29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */	29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */

30 #endif	30 #endif

31	31

32 #define VTABLE1 \	32 #define VTABLE1 \

33 { PREFIX(prologTok), PREFIX(contentTok), \	33 { PREFIX(prologTok), PREFIX(contentTok), \

34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \	34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \

35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \	35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \

36 PREFIX(sameName), \	36 PREFIX(sameName), \

37 PREFIX(nameMatchesAscii), \	37 PREFIX(nameMatchesAscii), \

38 PREFIX(nameLength), \	38 PREFIX(nameLength), \

39 PREFIX(skipS), \	39 PREFIX(skipS), \

40 PREFIX(getAtts), \	40 PREFIX(getAtts), \

41 PREFIX(charRefNumber), \	41 PREFIX(charRefNumber), \

42 PREFIX(predefinedEntityName), \	42 PREFIX(predefinedEntityName), \

43 PREFIX(updatePosition), \	43 PREFIX(updatePosition), \

44 PREFIX(isPublicId)	44 PREFIX(isPublicId)

45	45

46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)	46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)

47	47

48 #define UCS2_GET_NAMING(pages, hi, lo) \	48 #define UCS2_GET_NAMING(pages, hi, lo) \

49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))	49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))

50	50

51 /* A 2 byte UTF-8 representation splits the characters 11 bits between	51 /* A 2 byte UTF-8 representation splits the characters 11 bits between

52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into	52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into

53 pages, 3 bits to add to that index and 5 bits to generate the mask.	53 pages, 3 bits to add to that index and 5 bits to generate the mask.

54 */	54 */

55 #define UTF8_GET_NAMING2(pages, byte) \	55 #define UTF8_GET_NAMING2(pages, byte) \

56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \	56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \

57 + ((((byte)[0]) & 3) << 1) \	57 + ((((byte)[0]) & 3) << 1) \

58 + ((((byte)[1]) >> 5) & 1)] \	58 + ((((byte)[1]) >> 5) & 1)] \

59 & (1 << (((byte)[1]) & 0x1F)))	59 & (1u << (((byte)[1]) & 0x1F)))

60	60

61 /* A 3 byte UTF-8 representation splits the characters 16 bits between	61 /* A 3 byte UTF-8 representation splits the characters 16 bits between

62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index	62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index

63 into pages, 3 bits to add to that index and 5 bits to generate the	63 into pages, 3 bits to add to that index and 5 bits to generate the

64 mask.	64 mask.

65 */	65 */

66 #define UTF8_GET_NAMING3(pages, byte) \	66 #define UTF8_GET_NAMING3(pages, byte) \

67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \	67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \

68 + ((((byte)[1]) >> 2) & 0xF)] \	68 + ((((byte)[1]) >> 2) & 0xF)] \

69 << 3) \	69 << 3) \

70 + ((((byte)[1]) & 3) << 1) \	70 + ((((byte)[1]) & 3) << 1) \

71 + ((((byte)[2]) >> 5) & 1)] \	71 + ((((byte)[2]) >> 5) & 1)] \

72 & (1 << (((byte)[2]) & 0x1F)))	72 & (1u << (((byte)[2]) & 0x1F)))

73	73

74 #define UTF8_GET_NAMING(pages, p, n) \	74 #define UTF8_GET_NAMING(pages, p, n) \

75 ((n) == 2 \	75 ((n) == 2 \

76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \	76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \

77 : ((n) == 3 \	77 : ((n) == 3 \

78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \	78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \

79 : 0))	79 : 0))

80	80

81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B	81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B

82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/	82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
115 \|\| \	115 \|\| \

116 ((*p) == 0xF0 \	116 ((*p) == 0xF0 \

117 ? \	117 ? \

118 (p)[1] < 0x90 \|\| ((p)[1] & 0xC0) == 0xC0 \	118 (p)[1] < 0x90 \|\| ((p)[1] & 0xC0) == 0xC0 \

119 : \	119 : \

120 ((p)[1] & 0x80) == 0 \	120 ((p)[1] & 0x80) == 0 \

121 \|\| \	121 \|\| \

122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))	122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))

123	123

124 static int PTRFASTCALL	124 static int PTRFASTCALL

125 isNever(const ENCODING enc, const char p)	125 isNever(const ENCODING UNUSED_P(enc), const char UNUSED_P(p))

126 {	126 {

127 return 0;	127 return 0;

128 }	128 }

129	129

130 static int PTRFASTCALL	130 static int PTRFASTCALL

131 utf8_isName2(const ENCODING enc, const char p)	131 utf8_isName2(const ENCODING UNUSED_P(enc), const char p)

132 {	132 {

133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);	133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);

134 }	134 }

135	135

136 static int PTRFASTCALL	136 static int PTRFASTCALL

137 utf8_isName3(const ENCODING enc, const char p)	137 utf8_isName3(const ENCODING UNUSED_P(enc), const char p)

138 {	138 {

139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);	139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);

140 }	140 }

141	141

142 #define utf8_isName4 isNever	142 #define utf8_isName4 isNever

143	143

144 static int PTRFASTCALL	144 static int PTRFASTCALL

145 utf8_isNmstrt2(const ENCODING enc, const char p)	145 utf8_isNmstrt2(const ENCODING UNUSED_P(enc), const char p)

146 {	146 {

147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);	147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);

148 }	148 }

149	149

150 static int PTRFASTCALL	150 static int PTRFASTCALL

151 utf8_isNmstrt3(const ENCODING enc, const char p)	151 utf8_isNmstrt3(const ENCODING UNUSED_P(enc), const char p)

152 {	152 {

153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);	153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);

154 }	154 }

155	155

156 #define utf8_isNmstrt4 isNever	156 #define utf8_isNmstrt4 isNever

157	157

158 static int PTRFASTCALL	158 static int PTRFASTCALL

159 utf8_isInvalid2(const ENCODING enc, const char p)	159 utf8_isInvalid2(const ENCODING UNUSED_P(enc), const char p)

160 {	160 {

161 return UTF8_INVALID2((const unsigned char *)p);	161 return UTF8_INVALID2((const unsigned char *)p);

162 }	162 }

163	163

164 static int PTRFASTCALL	164 static int PTRFASTCALL

165 utf8_isInvalid3(const ENCODING enc, const char p)	165 utf8_isInvalid3(const ENCODING UNUSED_P(enc), const char p)

166 {	166 {

167 return UTF8_INVALID3((const unsigned char *)p);	167 return UTF8_INVALID3((const unsigned char *)p);

168 }	168 }

169	169

170 static int PTRFASTCALL	170 static int PTRFASTCALL

171 utf8_isInvalid4(const ENCODING enc, const char p)	171 utf8_isInvalid4(const ENCODING UNUSED_P(enc), const char p)

172 {	172 {

173 return UTF8_INVALID4((const unsigned char *)p);	173 return UTF8_INVALID4((const unsigned char *)p);

174 }	174 }

175	175

176 struct normal_encoding {	176 struct normal_encoding {

177 ENCODING enc;	177 ENCODING enc;

178 unsigned char type[256];	178 unsigned char type[256];

179 #ifdef XML_MIN_SIZE	179 #ifdef XML_MIN_SIZE

180 int (PTRFASTCALL byteType)(const ENCODING , const char *);	180 int (PTRFASTCALL byteType)(const ENCODING , const char *);

181 int (PTRFASTCALL isNameMin)(const ENCODING , const char *);	181 int (PTRFASTCALL isNameMin)(const ENCODING , const char *);

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
215 E ## isName2, \	215 E ## isName2, \

216 E ## isName3, \	216 E ## isName3, \

217 E ## isName4, \	217 E ## isName4, \

218 E ## isNmstrt2, \	218 E ## isNmstrt2, \

219 E ## isNmstrt3, \	219 E ## isNmstrt3, \

220 E ## isNmstrt4, \	220 E ## isNmstrt4, \

221 E ## isInvalid2, \	221 E ## isInvalid2, \

222 E ## isInvalid3, \	222 E ## isInvalid3, \

223 E ## isInvalid4	223 E ## isInvalid4

224	224

	225 #define NULL_VTABLE \

	226 /* isName2 */ NULL, \

	227 /* isName3 */ NULL, \

	228 /* isName4 */ NULL, \

	229 /* isNmstrt2 */ NULL, \

	230 /* isNmstrt3 */ NULL, \

	231 /* isNmstrt4 */ NULL, \

	232 /* isInvalid2 */ NULL, \

	233 /* isInvalid3 */ NULL, \

	234 /* isInvalid4 */ NULL

	235

225 static int FASTCALL checkCharRefNumber(int);	236 static int FASTCALL checkCharRefNumber(int);

226	237

227 #include "xmltok_impl.h"	238 #include "xmltok_impl.h"

228 #include "ascii.h"	239 #include "ascii.h"

229	240

230 #ifdef XML_MIN_SIZE	241 #ifdef XML_MIN_SIZE

231 #define sb_isNameMin isNever	242 #define sb_isNameMin isNever

232 #define sb_isNmstrtMin isNever	243 #define sb_isNmstrtMin isNever

233 #endif	244 #endif

234	245

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
311 #undef IS_NMSTRT_CHAR_MINBPC	322 #undef IS_NMSTRT_CHAR_MINBPC

312 #undef IS_INVALID_CHAR	323 #undef IS_INVALID_CHAR

313	324

314 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */	325 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */

315 UTF8_cval1 = 0x00,	326 UTF8_cval1 = 0x00,

316 UTF8_cval2 = 0xc0,	327 UTF8_cval2 = 0xc0,

317 UTF8_cval3 = 0xe0,	328 UTF8_cval3 = 0xe0,

318 UTF8_cval4 = 0xf0	329 UTF8_cval4 = 0xf0

319 };	330 };

320	331

321 static void PTRCALL	332 void

322 utf8_toUtf8(const ENCODING *enc,	333 align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)

	334 {

	335 const char * fromLim = *fromLimRef;

	336 size_t walked = 0;

	337 for (; fromLim > from; fromLim--, walked++) {

	338 const unsigned char prev = (unsigned char)fromLim[-1];

	339 if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */

	340 if (walked + 1 >= 4) {

	341 fromLim += 4 - 1;

	342 break;

	343 } else {

	344 walked = 0;

	345 }

	346 } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxx x byte */

	347 if (walked + 1 >= 3) {

	348 fromLim += 3 - 1;

	349 break;

	350 } else {

	351 walked = 0;

	352 }

	353 } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxx x byte */

	354 if (walked + 1 >= 2) {

	355 fromLim += 2 - 1;

	356 break;

	357 } else {

	358 walked = 0;

	359 }

	360 } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxx xx */

	361 break;

	362 }

	363 }

	364 *fromLimRef = fromLim;

	365 }

	366

	367 static enum XML_Convert_Result PTRCALL

	368 utf8_toUtf8(const ENCODING *UNUSED_P(enc),

323 const char *fromP, const char fromLim,	369 const char *fromP, const char fromLim,

324 char *toP, const char toLim)	370 char *toP, const char toLim)

325 {	371 {

	372 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;

326 char *to;	373 char *to;

327 const char *from;	374 const char *from;

328 if (fromLim - fromP > toLim - toP) {	375 if (fromLim - fromP > toLim - toP) {

329 /* Avoid copying partial characters. */	376 /* Avoid copying partial characters. */

330 for (fromLim = fromP + (toLim - toP); fromLim > *fromP; fromLim--)	377 res = XML_CONVERT_OUTPUT_EXHAUSTED;

331 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)	378 fromLim = fromP + (toLim - toP);

332 break;	379 align_limit_to_full_utf8_characters(*fromP, &fromLim);

333 }	380 }

334 for (to = toP, from = fromP; from != fromLim; from++, to++)	381 for (to = toP, from = fromP; (from < fromLim) && (to < toLim); from++, to++)

335 to = from;	382 to = from;

336 *fromP = from;	383 *fromP = from;

337 *toP = to;	384 *toP = to;

	385

	386 if ((to == toLim) && (from < fromLim))

	387 return XML_CONVERT_OUTPUT_EXHAUSTED;

	388 else

	389 return res;

338 }	390 }

339	391

340 static void PTRCALL	392 static enum XML_Convert_Result PTRCALL

341 utf8_toUtf16(const ENCODING *enc,	393 utf8_toUtf16(const ENCODING *enc,

342 const char *fromP, const char fromLim,	394 const char *fromP, const char fromLim,

343 unsigned short *toP, const unsigned short toLim)	395 unsigned short *toP, const unsigned short toLim)

344 {	396 {

	397 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;

345 unsigned short to = toP;	398 unsigned short to = toP;

346 const char from = fromP;	399 const char from = fromP;

347 while (from != fromLim && to != toLim) {	400 while (from < fromLim && to < toLim) {

348 switch (((struct normal_encoding )enc)->type[(unsigned char)from]) {	401 switch (((struct normal_encoding )enc)->type[(unsigned char)from]) {

349 case BT_LEAD2:	402 case BT_LEAD2:

	403 if (fromLim - from < 2) {

	404 res = XML_CONVERT_INPUT_INCOMPLETE;

	405 break;

	406 }

350 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) \| (from[1] & 0x3f));	407 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) \| (from[1] & 0x3f));

351 from += 2;	408 from += 2;

352 break;	409 break;

353 case BT_LEAD3:	410 case BT_LEAD3:

	411 if (fromLim - from < 3) {

	412 res = XML_CONVERT_INPUT_INCOMPLETE;

	413 break;

	414 }

354 *to++ = (unsigned short)(((from[0] & 0xf) << 12)	415 *to++ = (unsigned short)(((from[0] & 0xf) << 12)

355 \| ((from[1] & 0x3f) << 6) \| (from[2] & 0x3f));	416 \| ((from[1] & 0x3f) << 6) \| (from[2] & 0x3f));

356 from += 3;	417 from += 3;

357 break;	418 break;

358 case BT_LEAD4:	419 case BT_LEAD4:

359 {	420 {

360 unsigned long n;	421 unsigned long n;

361 if (to + 1 == toLim)	422 if (toLim - to < 2) {

	423 res = XML_CONVERT_OUTPUT_EXHAUSTED;

362 goto after;	424 goto after;

	425 }

	426 if (fromLim - from < 4) {

	427 res = XML_CONVERT_INPUT_INCOMPLETE;

	428 goto after;

	429 }

363 n = ((from[0] & 0x7) << 18) \| ((from[1] & 0x3f) << 12)	430 n = ((from[0] & 0x7) << 18) \| ((from[1] & 0x3f) << 12)

364 \| ((from[2] & 0x3f) << 6) \| (from[3] & 0x3f);	431 \| ((from[2] & 0x3f) << 6) \| (from[3] & 0x3f);

365 n -= 0x10000;	432 n -= 0x10000;

366 to[0] = (unsigned short)((n >> 10) \| 0xD800);	433 to[0] = (unsigned short)((n >> 10) \| 0xD800);

367 to[1] = (unsigned short)((n & 0x3FF) \| 0xDC00);	434 to[1] = (unsigned short)((n & 0x3FF) \| 0xDC00);

368 to += 2;	435 to += 2;

369 from += 4;	436 from += 4;

370 }	437 }

371 break;	438 break;

372 default:	439 default:

373 to++ = from++;	440 to++ = from++;

374 break;	441 break;

375 }	442 }

376 }	443 }

377 after:	444 after:

378 *fromP = from;	445 *fromP = from;

379 *toP = to;	446 *toP = to;

	447 return res;

380 }	448 }

381	449

382 #ifdef XML_NS	450 #ifdef XML_NS

383 static const struct normal_encoding utf8_encoding_ns = {	451 static const struct normal_encoding utf8_encoding_ns = {

384 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },	452 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },

385 {	453 {

386 #include "asciitab.h"	454 #include "asciitab.h"

387 #include "utf8tab.h"	455 #include "utf8tab.h"

388 },	456 },

389 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)	457 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)

(...skipping 28 matching lines...) Expand all Loading...
418 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },	486 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },

419 {	487 {

420 #define BT_COLON BT_NMSTRT	488 #define BT_COLON BT_NMSTRT

421 #include "iasciitab.h"	489 #include "iasciitab.h"

422 #undef BT_COLON	490 #undef BT_COLON

423 #include "utf8tab.h"	491 #include "utf8tab.h"

424 },	492 },

425 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)	493 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)

426 };	494 };

427	495

428 static void PTRCALL	496 static enum XML_Convert_Result PTRCALL

429 latin1_toUtf8(const ENCODING *enc,	497 latin1_toUtf8(const ENCODING *UNUSED_P(enc),

430 const char *fromP, const char fromLim,	498 const char *fromP, const char fromLim,

431 char *toP, const char toLim)	499 char *toP, const char toLim)

432 {	500 {

433 for (;;) {	501 for (;;) {

434 unsigned char c;	502 unsigned char c;

435 if (*fromP == fromLim)	503 if (*fromP == fromLim)

436 break;	504 return XML_CONVERT_COMPLETED;

437 c = (unsigned char)**fromP;	505 c = (unsigned char)**fromP;

438 if (c & 0x80) {	506 if (c & 0x80) {

439 if (toLim - *toP < 2)	507 if (toLim - *toP < 2)

440 break;	508 return XML_CONVERT_OUTPUT_EXHAUSTED;

441 (toP)++ = (char)((c >> 6) \| UTF8_cval2);	509 (toP)++ = (char)((c >> 6) \| UTF8_cval2);

442 (toP)++ = (char)((c & 0x3f) \| 0x80);	510 (toP)++ = (char)((c & 0x3f) \| 0x80);

443 (*fromP)++;	511 (*fromP)++;

444 }	512 }

445 else {	513 else {

446 if (*toP == toLim)	514 if (*toP == toLim)

447 break;	515 return XML_CONVERT_OUTPUT_EXHAUSTED;

448 (toP)++ = (fromP)++;	516 (toP)++ = (fromP)++;

449 }	517 }

450 }	518 }

451 }	519 }

452	520

453 static void PTRCALL	521 static enum XML_Convert_Result PTRCALL

454 latin1_toUtf16(const ENCODING *enc,	522 latin1_toUtf16(const ENCODING *UNUSED_P(enc),

455 const char *fromP, const char fromLim,	523 const char *fromP, const char fromLim,

456 unsigned short *toP, const unsigned short toLim)	524 unsigned short *toP, const unsigned short toLim)

457 {	525 {

458 while (fromP != fromLim && toP != toLim)	526 while (fromP < fromLim && toP < toLim)

459 (toP)++ = (unsigned char)(fromP)++;	527 (toP)++ = (unsigned char)(fromP)++;

	528

	529 if ((toP == toLim) && (fromP < fromLim))

	530 return XML_CONVERT_OUTPUT_EXHAUSTED;

	531 else

	532 return XML_CONVERT_COMPLETED;

460 }	533 }

461	534

462 #ifdef XML_NS	535 #ifdef XML_NS

463	536

464 static const struct normal_encoding latin1_encoding_ns = {	537 static const struct normal_encoding latin1_encoding_ns = {

465 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },	538 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },

466 {	539 {

467 #include "asciitab.h"	540 #include "asciitab.h"

468 #include "latin1tab.h"	541 #include "latin1tab.h"

469 },	542 },

470 STANDARD_VTABLE(sb_)	543 STANDARD_VTABLE(sb_) NULL_VTABLE

471 };	544 };

472	545

473 #endif	546 #endif

474	547

475 static const struct normal_encoding latin1_encoding = {	548 static const struct normal_encoding latin1_encoding = {

476 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },	549 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },

477 {	550 {

478 #define BT_COLON BT_NMSTRT	551 #define BT_COLON BT_NMSTRT

479 #include "asciitab.h"	552 #include "asciitab.h"

480 #undef BT_COLON	553 #undef BT_COLON

481 #include "latin1tab.h"	554 #include "latin1tab.h"

482 },	555 },

483 STANDARD_VTABLE(sb_)	556 STANDARD_VTABLE(sb_) NULL_VTABLE

484 };	557 };

485	558

486 static void PTRCALL	559 static enum XML_Convert_Result PTRCALL

487 ascii_toUtf8(const ENCODING *enc,	560 ascii_toUtf8(const ENCODING *UNUSED_P(enc),

488 const char *fromP, const char fromLim,	561 const char *fromP, const char fromLim,

489 char *toP, const char toLim)	562 char *toP, const char toLim)

490 {	563 {

491 while (fromP != fromLim && toP != toLim)	564 while (fromP < fromLim && toP < toLim)

492 (toP)++ = (fromP)++;	565 (toP)++ = (fromP)++;

	566

	567 if ((toP == toLim) && (fromP < fromLim))

	568 return XML_CONVERT_OUTPUT_EXHAUSTED;

	569 else

	570 return XML_CONVERT_COMPLETED;

493 }	571 }

494	572

495 #ifdef XML_NS	573 #ifdef XML_NS

496	574

497 static const struct normal_encoding ascii_encoding_ns = {	575 static const struct normal_encoding ascii_encoding_ns = {

498 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },	576 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },

499 {	577 {

500 #include "asciitab.h"	578 #include "asciitab.h"

501 /* BT_NONXML == 0 */	579 /* BT_NONXML == 0 */

502 },	580 },

503 STANDARD_VTABLE(sb_)	581 STANDARD_VTABLE(sb_) NULL_VTABLE

504 };	582 };

505	583

506 #endif	584 #endif

507	585

508 static const struct normal_encoding ascii_encoding = {	586 static const struct normal_encoding ascii_encoding = {

509 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },	587 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },

510 {	588 {

511 #define BT_COLON BT_NMSTRT	589 #define BT_COLON BT_NMSTRT

512 #include "asciitab.h"	590 #include "asciitab.h"

513 #undef BT_COLON	591 #undef BT_COLON

514 /* BT_NONXML == 0 */	592 /* BT_NONXML == 0 */

515 },	593 },

516 STANDARD_VTABLE(sb_)	594 STANDARD_VTABLE(sb_) NULL_VTABLE

517 };	595 };

518	596

519 static int PTRFASTCALL	597 static int PTRFASTCALL

520 unicode_byte_type(char hi, char lo)	598 unicode_byte_type(char hi, char lo)

521 {	599 {

522 switch ((unsigned char)hi) {	600 switch ((unsigned char)hi) {

523 case 0xD8: case 0xD9: case 0xDA: case 0xDB:	601 case 0xD8: case 0xD9: case 0xDA: case 0xDB:

524 return BT_LEAD4;	602 return BT_LEAD4;

525 case 0xDC: case 0xDD: case 0xDE: case 0xDF:	603 case 0xDC: case 0xDD: case 0xDE: case 0xDF:

526 return BT_TRAIL;	604 return BT_TRAIL;

527 case 0xFF:	605 case 0xFF:

528 switch ((unsigned char)lo) {	606 switch ((unsigned char)lo) {

529 case 0xFF:	607 case 0xFF:

530 case 0xFE:	608 case 0xFE:

531 return BT_NONXML;	609 return BT_NONXML;

532 }	610 }

533 break;	611 break;

534 }	612 }

535 return BT_NONASCII;	613 return BT_NONASCII;

536 }	614 }

537	615

538 #define DEFINE_UTF16_TO_UTF8(E) \	616 #define DEFINE_UTF16_TO_UTF8(E) \

539 static void PTRCALL \	617 static enum XML_Convert_Result PTRCALL \

540 E ## toUtf8(const ENCODING *enc, \	618 E ## toUtf8(const ENCODING *UNUSED_P(enc), \

541 const char *fromP, const char fromLim, \	619 const char *fromP, const char fromLim, \

542 char *toP, const char toLim) \	620 char *toP, const char toLim) \

543 { \	621 { \

544 const char *from; \	622 const char from = fromP; \

545 for (from = *fromP; from != fromLim; from += 2) { \	623 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \

	624 for (; from < fromLim; from += 2) { \

546 int plane; \	625 int plane; \

547 unsigned char lo2; \	626 unsigned char lo2; \

548 unsigned char lo = GET_LO(from); \	627 unsigned char lo = GET_LO(from); \

549 unsigned char hi = GET_HI(from); \	628 unsigned char hi = GET_HI(from); \

550 switch (hi) { \	629 switch (hi) { \

551 case 0: \	630 case 0: \

552 if (lo < 0x80) { \	631 if (lo < 0x80) { \

553 if (*toP == toLim) { \	632 if (*toP == toLim) { \

554 *fromP = from; \	633 *fromP = from; \

555 return; \	634 return XML_CONVERT_OUTPUT_EXHAUSTED; \

556 } \	635 } \

557 (toP)++ = lo; \	636 (toP)++ = lo; \

558 break; \	637 break; \

559 } \	638 } \

560 /* fall through */ \	639 /* fall through */ \

561 case 0x1: case 0x2: case 0x3: \	640 case 0x1: case 0x2: case 0x3: \

562 case 0x4: case 0x5: case 0x6: case 0x7: \	641 case 0x4: case 0x5: case 0x6: case 0x7: \

563 if (toLim - *toP < 2) { \	642 if (toLim - *toP < 2) { \

564 *fromP = from; \	643 *fromP = from; \

565 return; \	644 return XML_CONVERT_OUTPUT_EXHAUSTED; \

566 } \	645 } \

567 (toP)++ = ((lo >> 6) \| (hi << 2) \| UTF8_cval2); \	646 (toP)++ = ((lo >> 6) \| (hi << 2) \| UTF8_cval2); \

568 (toP)++ = ((lo & 0x3f) \| 0x80); \	647 (toP)++ = ((lo & 0x3f) \| 0x80); \

569 break; \	648 break; \

570 default: \	649 default: \

571 if (toLim - *toP < 3) { \	650 if (toLim - *toP < 3) { \

572 *fromP = from; \	651 *fromP = from; \

573 return; \	652 return XML_CONVERT_OUTPUT_EXHAUSTED; \

574 } \	653 } \

575 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \	654 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \

576 (toP)++ = ((hi >> 4) \| UTF8_cval3); \	655 (toP)++ = ((hi >> 4) \| UTF8_cval3); \

577 (toP)++ = (((hi & 0xf) << 2) \| (lo >> 6) \| 0x80); \	656 (toP)++ = (((hi & 0xf) << 2) \| (lo >> 6) \| 0x80); \

578 (toP)++ = ((lo & 0x3f) \| 0x80); \	657 (toP)++ = ((lo & 0x3f) \| 0x80); \

579 break; \	658 break; \

580 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \	659 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \

581 if (toLim - *toP < 4) { \	660 if (toLim - *toP < 4) { \

582 *fromP = from; \	661 *fromP = from; \

583 return; \	662 return XML_CONVERT_OUTPUT_EXHAUSTED; \

	663 } \

	664 if (fromLim - from < 4) { \

	665 *fromP = from; \

	666 return XML_CONVERT_INPUT_INCOMPLETE; \

584 } \	667 } \

585 plane = (((hi & 0x3) << 2) \| ((lo >> 6) & 0x3)) + 1; \	668 plane = (((hi & 0x3) << 2) \| ((lo >> 6) & 0x3)) + 1; \

586 (toP)++ = ((plane >> 2) \| UTF8_cval4); \	669 (toP)++ = ((plane >> 2) \| UTF8_cval4); \

587 (toP)++ = (((lo >> 2) & 0xF) \| ((plane & 0x3) << 4) \| 0x80); \	670 (toP)++ = (((lo >> 2) & 0xF) \| ((plane & 0x3) << 4) \| 0x80); \

588 from += 2; \	671 from += 2; \

589 lo2 = GET_LO(from); \	672 lo2 = GET_LO(from); \

590 (toP)++ = (((lo & 0x3) << 4) \	673 (toP)++ = (((lo & 0x3) << 4) \

591 \| ((GET_HI(from) & 0x3) << 2) \	674 \| ((GET_HI(from) & 0x3) << 2) \

592 \| (lo2 >> 6) \	675 \| (lo2 >> 6) \

593 \| 0x80); \	676 \| 0x80); \

594 (toP)++ = ((lo2 & 0x3f) \| 0x80); \	677 (toP)++ = ((lo2 & 0x3f) \| 0x80); \

595 break; \	678 break; \

596 } \	679 } \

597 } \	680 } \

598 *fromP = from; \	681 *fromP = from; \

	682 if (from < fromLim) \

	683 return XML_CONVERT_INPUT_INCOMPLETE; \

	684 else \

	685 return XML_CONVERT_COMPLETED; \

599 }	686 }

600	687

601 #define DEFINE_UTF16_TO_UTF16(E) \	688 #define DEFINE_UTF16_TO_UTF16(E) \

602 static void PTRCALL \	689 static enum XML_Convert_Result PTRCALL \

603 E ## toUtf16(const ENCODING *enc, \	690 E ## toUtf16(const ENCODING *UNUSED_P(enc), \

604 const char *fromP, const char fromLim, \	691 const char *fromP, const char fromLim, \

605 unsigned short *toP, const unsigned short toLim) \	692 unsigned short *toP, const unsigned short toLim) \

606 { \	693 { \

	694 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \

	695 fromLim = fromP + (((fromLim - fromP) >> 1) << 1); /* shrink to even */ \

607 /* Avoid copying first half only of surrogate */ \	696 /* Avoid copying first half only of surrogate */ \

608 if (fromLim - fromP > ((toLim - toP) << 1) \	697 if (fromLim - fromP > ((toLim - toP) << 1) \

609 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \	698 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \

610 fromLim -= 2; \	699 fromLim -= 2; \

611 for (; fromP != fromLim && toP != toLim; *fromP += 2) \	700 res = XML_CONVERT_INPUT_INCOMPLETE; \

	701 } \

	702 for (; fromP < fromLim && toP < toLim; *fromP += 2) \

612 (toP)++ = (GET_HI(fromP) << 8) \| GET_LO(fromP); \	703 (toP)++ = (GET_HI(fromP) << 8) \| GET_LO(fromP); \

	704 if ((toP == toLim) && (fromP < fromLim)) \

	705 return XML_CONVERT_OUTPUT_EXHAUSTED; \

	706 else \

	707 return res; \

613 }	708 }

614	709

615 #define SET2(ptr, ch) \	710 #define SET2(ptr, ch) \

616 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))	711 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))

617 #define GET_LO(ptr) ((unsigned char)(ptr)[0])	712 #define GET_LO(ptr) ((unsigned char)(ptr)[0])

618 #define GET_HI(ptr) ((unsigned char)(ptr)[1])	713 #define GET_HI(ptr) ((unsigned char)(ptr)[1])

619	714

620 DEFINE_UTF16_TO_UTF8(little2_)	715 DEFINE_UTF16_TO_UTF8(little2_)

621 DEFINE_UTF16_TO_UTF16(little2_)	716 DEFINE_UTF16_TO_UTF16(little2_)

622	717

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
719 #if BYTEORDER == 1234	814 #if BYTEORDER == 1234

720 1	815 1

721 #else	816 #else

722 0	817 0

723 #endif	818 #endif

724 },	819 },

725 {	820 {

726 #include "asciitab.h"	821 #include "asciitab.h"

727 #include "latin1tab.h"	822 #include "latin1tab.h"

728 },	823 },

729 STANDARD_VTABLE(little2_)	824 STANDARD_VTABLE(little2_) NULL_VTABLE

730 };	825 };

731	826

732 #endif	827 #endif

733	828

734 static const struct normal_encoding little2_encoding = {	829 static const struct normal_encoding little2_encoding = {

735 { VTABLE, 2, 0,	830 { VTABLE, 2, 0,

736 #if BYTEORDER == 1234	831 #if BYTEORDER == 1234

737 1	832 1

738 #else	833 #else

739 0	834 0

740 #endif	835 #endif

741 },	836 },

742 {	837 {

743 #define BT_COLON BT_NMSTRT	838 #define BT_COLON BT_NMSTRT

744 #include "asciitab.h"	839 #include "asciitab.h"

745 #undef BT_COLON	840 #undef BT_COLON

746 #include "latin1tab.h"	841 #include "latin1tab.h"

747 },	842 },

748 STANDARD_VTABLE(little2_)	843 STANDARD_VTABLE(little2_) NULL_VTABLE

749 };	844 };

750	845

751 #if BYTEORDER != 4321	846 #if BYTEORDER != 4321

752	847

753 #ifdef XML_NS	848 #ifdef XML_NS

754	849

755 static const struct normal_encoding internal_little2_encoding_ns = {	850 static const struct normal_encoding internal_little2_encoding_ns = {

756 { VTABLE, 2, 0, 1 },	851 { VTABLE, 2, 0, 1 },

757 {	852 {

758 #include "iasciitab.h"	853 #include "iasciitab.h"

759 #include "latin1tab.h"	854 #include "latin1tab.h"

760 },	855 },

761 STANDARD_VTABLE(little2_)	856 STANDARD_VTABLE(little2_) NULL_VTABLE

762 };	857 };

763	858

764 #endif	859 #endif

765	860

766 static const struct normal_encoding internal_little2_encoding = {	861 static const struct normal_encoding internal_little2_encoding = {

767 { VTABLE, 2, 0, 1 },	862 { VTABLE, 2, 0, 1 },

768 {	863 {

769 #define BT_COLON BT_NMSTRT	864 #define BT_COLON BT_NMSTRT

770 #include "iasciitab.h"	865 #include "iasciitab.h"

771 #undef BT_COLON	866 #undef BT_COLON

772 #include "latin1tab.h"	867 #include "latin1tab.h"

773 },	868 },

774 STANDARD_VTABLE(little2_)	869 STANDARD_VTABLE(little2_) NULL_VTABLE

775 };	870 };

776	871

777 #endif	872 #endif

778	873

779	874

780 #define BIG2_BYTE_TYPE(enc, p) \	875 #define BIG2_BYTE_TYPE(enc, p) \

781 ((p)[0] == 0 \	876 ((p)[0] == 0 \

782 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \	877 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \

783 : unicode_byte_type((p)[0], (p)[1]))	878 : unicode_byte_type((p)[0], (p)[1]))

784 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)	879 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
860 #if BYTEORDER == 4321	955 #if BYTEORDER == 4321

861 1	956 1

862 #else	957 #else

863 0	958 0

864 #endif	959 #endif

865 },	960 },

866 {	961 {

867 #include "asciitab.h"	962 #include "asciitab.h"

868 #include "latin1tab.h"	963 #include "latin1tab.h"

869 },	964 },

870 STANDARD_VTABLE(big2_)	965 STANDARD_VTABLE(big2_) NULL_VTABLE

871 };	966 };

872	967

873 #endif	968 #endif

874	969

875 static const struct normal_encoding big2_encoding = {	970 static const struct normal_encoding big2_encoding = {

876 { VTABLE, 2, 0,	971 { VTABLE, 2, 0,

877 #if BYTEORDER == 4321	972 #if BYTEORDER == 4321

878 1	973 1

879 #else	974 #else

880 0	975 0

881 #endif	976 #endif

882 },	977 },

883 {	978 {

884 #define BT_COLON BT_NMSTRT	979 #define BT_COLON BT_NMSTRT

885 #include "asciitab.h"	980 #include "asciitab.h"

886 #undef BT_COLON	981 #undef BT_COLON

887 #include "latin1tab.h"	982 #include "latin1tab.h"

888 },	983 },

889 STANDARD_VTABLE(big2_)	984 STANDARD_VTABLE(big2_) NULL_VTABLE

890 };	985 };

891	986

892 #if BYTEORDER != 1234	987 #if BYTEORDER != 1234

893	988

894 #ifdef XML_NS	989 #ifdef XML_NS

895	990

896 static const struct normal_encoding internal_big2_encoding_ns = {	991 static const struct normal_encoding internal_big2_encoding_ns = {

897 { VTABLE, 2, 0, 1 },	992 { VTABLE, 2, 0, 1 },

898 {	993 {

899 #include "iasciitab.h"	994 #include "iasciitab.h"

900 #include "latin1tab.h"	995 #include "latin1tab.h"

901 },	996 },

902 STANDARD_VTABLE(big2_)	997 STANDARD_VTABLE(big2_) NULL_VTABLE

903 };	998 };

904	999

905 #endif	1000 #endif

906	1001

907 static const struct normal_encoding internal_big2_encoding = {	1002 static const struct normal_encoding internal_big2_encoding = {

908 { VTABLE, 2, 0, 1 },	1003 { VTABLE, 2, 0, 1 },

909 {	1004 {

910 #define BT_COLON BT_NMSTRT	1005 #define BT_COLON BT_NMSTRT

911 #include "iasciitab.h"	1006 #include "iasciitab.h"

912 #undef BT_COLON	1007 #undef BT_COLON

913 #include "latin1tab.h"	1008 #include "latin1tab.h"

914 },	1009 },

915 STANDARD_VTABLE(big2_)	1010 STANDARD_VTABLE(big2_) NULL_VTABLE

916 };	1011 };

917	1012

918 #endif	1013 #endif

919	1014

920 #undef PREFIX	1015 #undef PREFIX

921	1016

922 static int FASTCALL	1017 static int FASTCALL

923 streqci(const char s1, const char s2)	1018 streqci(const char s1, const char s2)

924 {	1019 {

925 for (;;) {	1020 for (;;) {

926 char c1 = *s1++;	1021 char c1 = *s1++;

927 char c2 = *s2++;	1022 char c2 = *s2++;

928 if (ASCII_a <= c1 && c1 <= ASCII_z)	1023 if (ASCII_a <= c1 && c1 <= ASCII_z)

929 c1 += ASCII_A - ASCII_a;	1024 c1 += ASCII_A - ASCII_a;

930 if (ASCII_a <= c2 && c2 <= ASCII_z)	1025 if (ASCII_a <= c2 && c2 <= ASCII_z)

931 c2 += ASCII_A - ASCII_a;	1026 c2 += ASCII_A - ASCII_a;

932 if (c1 != c2)	1027 if (c1 != c2)

933 return 0;	1028 return 0;

934 if (!c1)	1029 if (!c1)

935 break;	1030 break;

936 }	1031 }

937 return 1;	1032 return 1;

938 }	1033 }

939	1034

940 static void PTRCALL	1035 static void PTRCALL

941 initUpdatePosition(const ENCODING enc, const char ptr,	1036 initUpdatePosition(const ENCODING UNUSED_P(enc), const char ptr,

942 const char end, POSITION pos)	1037 const char end, POSITION pos)

943 {	1038 {

944 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);	1039 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);

945 }	1040 }

946	1041

947 static int	1042 static int

948 toAscii(const ENCODING enc, const char ptr, const char *end)	1043 toAscii(const ENCODING enc, const char ptr, const char *end)

949 {	1044 {

950 char buf[1];	1045 char buf[1];

951 char *p = buf;	1046 char *p = buf;

(...skipping 329 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1281 }	1376 }

1282	1377

1283 static int PTRFASTCALL	1378 static int PTRFASTCALL

1284 unknown_isInvalid(const ENCODING enc, const char p)	1379 unknown_isInvalid(const ENCODING enc, const char p)

1285 {	1380 {

1286 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);	1381 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);

1287 int c = uenc->convert(uenc->userData, p);	1382 int c = uenc->convert(uenc->userData, p);

1288 return (c & ~0xFFFF) \|\| checkCharRefNumber(c) < 0;	1383 return (c & ~0xFFFF) \|\| checkCharRefNumber(c) < 0;

1289 }	1384 }

1290	1385

1291 static void PTRCALL	1386 static enum XML_Convert_Result PTRCALL

1292 unknown_toUtf8(const ENCODING *enc,	1387 unknown_toUtf8(const ENCODING *enc,

1293 const char *fromP, const char fromLim,	1388 const char *fromP, const char fromLim,

1294 char *toP, const char toLim)	1389 char *toP, const char toLim)

1295 {	1390 {

1296 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);	1391 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);

1297 char buf[XML_UTF8_ENCODE_MAX];	1392 char buf[XML_UTF8_ENCODE_MAX];

1298 for (;;) {	1393 for (;;) {

1299 const char *utf8;	1394 const char *utf8;

1300 int n;	1395 int n;

1301 if (*fromP == fromLim)	1396 if (*fromP == fromLim)

1302 break;	1397 return XML_CONVERT_COMPLETED;

1303 utf8 = uenc->utf8[(unsigned char)**fromP];	1398 utf8 = uenc->utf8[(unsigned char)**fromP];

1304 n = *utf8++;	1399 n = *utf8++;

1305 if (n == 0) {	1400 if (n == 0) {

1306 int c = uenc->convert(uenc->userData, *fromP);	1401 int c = uenc->convert(uenc->userData, *fromP);

1307 n = XmlUtf8Encode(c, buf);	1402 n = XmlUtf8Encode(c, buf);

1308 if (n > toLim - *toP)	1403 if (n > toLim - *toP)

1309 break;	1404 return XML_CONVERT_OUTPUT_EXHAUSTED;

1310 utf8 = buf;	1405 utf8 = buf;

1311 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]	1406 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]

1312 - (BT_LEAD2 - 2));	1407 - (BT_LEAD2 - 2));

1313 }	1408 }

1314 else {	1409 else {

1315 if (n > toLim - *toP)	1410 if (n > toLim - *toP)

1316 break;	1411 return XML_CONVERT_OUTPUT_EXHAUSTED;

1317 (*fromP)++;	1412 (*fromP)++;

1318 }	1413 }

1319 do {	1414 do {

1320 (toP)++ = *utf8++;	1415 (toP)++ = *utf8++;

1321 } while (--n != 0);	1416 } while (--n != 0);

1322 }	1417 }

1323 }	1418 }

1324	1419

1325 static void PTRCALL	1420 static enum XML_Convert_Result PTRCALL

1326 unknown_toUtf16(const ENCODING *enc,	1421 unknown_toUtf16(const ENCODING *enc,

1327 const char *fromP, const char fromLim,	1422 const char *fromP, const char fromLim,

1328 unsigned short *toP, const unsigned short toLim)	1423 unsigned short *toP, const unsigned short toLim)

1329 {	1424 {

1330 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);	1425 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);

1331 while (fromP != fromLim && toP != toLim) {	1426 while (fromP < fromLim && toP < toLim) {

1332 unsigned short c = uenc->utf16[(unsigned char)**fromP];	1427 unsigned short c = uenc->utf16[(unsigned char)**fromP];

1333 if (c == 0) {	1428 if (c == 0) {

1334 c = (unsigned short)	1429 c = (unsigned short)

1335 uenc->convert(uenc->userData, *fromP);	1430 uenc->convert(uenc->userData, *fromP);

1336 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]	1431 fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)*fromP]

1337 - (BT_LEAD2 - 2));	1432 - (BT_LEAD2 - 2));

1338 }	1433 }

1339 else	1434 else

1340 (*fromP)++;	1435 (*fromP)++;

1341 (toP)++ = c;	1436 (toP)++ = c;

1342 }	1437 }

	1438

	1439 if ((toP == toLim) && (fromP < fromLim))

	1440 return XML_CONVERT_OUTPUT_EXHAUSTED;

	1441 else

	1442 return XML_CONVERT_COMPLETED;

1343 }	1443 }

1344	1444

1345 ENCODING *	1445 ENCODING *

1346 XmlInitUnknownEncoding(void *mem,	1446 XmlInitUnknownEncoding(void *mem,

1347 int *table,	1447 int *table,

1348 CONVERTER convert,	1448 CONVERTER convert,

1349 void *userData)	1449 void *userData)

1350 {	1450 {

1351 int i;	1451 int i;

1352 struct unknown_encoding e = (struct unknown_encoding )mem;	1452 struct unknown_encoding e = (struct unknown_encoding )mem;

(...skipping 143 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1496 static int	1596 static int

1497 initScan(const ENCODING * const *encodingTable,	1597 initScan(const ENCODING * const *encodingTable,

1498 const INIT_ENCODING *enc,	1598 const INIT_ENCODING *enc,

1499 int state,	1599 int state,

1500 const char *ptr,	1600 const char *ptr,

1501 const char *end,	1601 const char *end,

1502 const char **nextTokPtr)	1602 const char **nextTokPtr)

1503 {	1603 {

1504 const ENCODING **encPtr;	1604 const ENCODING **encPtr;

1505	1605

1506 if (ptr == end)	1606 if (ptr >= end)

1507 return XML_TOK_NONE;	1607 return XML_TOK_NONE;

1508 encPtr = enc->encPtr;	1608 encPtr = enc->encPtr;

1509 if (ptr + 1 == end) {	1609 if (ptr + 1 == end) {

1510 /* only a single byte available for auto-detection */	1610 /* only a single byte available for auto-detection */

1511 #ifndef XML_DTD /* FIXME */	1611 #ifndef XML_DTD /* FIXME */

1512 /* a well-formed document entity must have more than one byte */	1612 /* a well-formed document entity must have more than one byte */

1513 if (state != XML_CONTENT_STATE)	1613 if (state != XML_CONTENT_STATE)

1514 return XML_TOK_PARTIAL;	1614 return XML_TOK_PARTIAL;

1515 #endif	1615 #endif

1516 /* so we're parsing an external text entity... */	1616 /* so we're parsing an external text entity... */

(...skipping 125 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1642 CONVERTER convert,	1742 CONVERTER convert,

1643 void *userData)	1743 void *userData)

1644 {	1744 {

1645 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);	1745 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);

1646 if (enc)	1746 if (enc)

1647 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;	1747 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;

1648 return enc;	1748 return enc;

1649 }	1749 }

1650	1750

1651 #endif /* XML_NS */	1751 #endif /* XML_NS */

OLD	NEW

« no previous file with comments | « third_party/expat/files/lib/xmltok.c ('k') | third_party/expat/files/lib/xmltok_impl.c » ('j') | no next file with comments »