Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(85)

Side by Side Diff: third_party/expat/files/lib/xmltok.c

Issue 2761253002: Update expat to 2.2.0 to fix CVE vulnerability. (Closed)
Patch Set: update README.chromium Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/expat/files/lib/xmltok.h ('k') | third_party/expat/files/lib/xmltok.c.origin » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission. 2 See the file COPYING for copying permission.
3 */ 3 */
4 4
5 #include <stddef.h> 5 #include <stddef.h>
6 6
7 #ifdef COMPILED_FROM_DSP 7 #ifdef WIN32
8 #include "winconfig.h" 8 #include "winconfig.h"
9 #elif defined(MACOS_CLASSIC) 9 #elif defined(MACOS_CLASSIC)
10 #include "macconfig.h" 10 #include "macconfig.h"
11 #elif defined(__amigaos__) 11 #elif defined(__amigaos__)
12 #include "amigaconfig.h" 12 #include "amigaconfig.h"
13 #elif defined(__WATCOMC__) 13 #elif defined(__WATCOMC__)
14 #include "watcomconfig.h" 14 #include "watcomconfig.h"
15 #else 15 #else
16 #ifdef HAVE_EXPAT_CONFIG_H 16 #ifdef HAVE_EXPAT_CONFIG_H
17 #include <expat_config.h> 17 #include <expat_config.h>
18 #endif 18 #endif
19 #endif /* ndef COMPILED_FROM_DSP */ 19 #endif /* ndef WIN32 */
20 20
21 #include "expat_external.h" 21 #include "expat_external.h"
22 #include "internal.h" 22 #include "internal.h"
23 #include "xmltok.h" 23 #include "xmltok.h"
24 #include "nametab.h" 24 #include "nametab.h"
25 25
26 #ifdef XML_DTD 26 #ifdef XML_DTD
27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) 27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
28 #else 28 #else
29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ 29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */
30 #endif 30 #endif
31 31
32 #define VTABLE1 \ 32 #define VTABLE1 \
33 { PREFIX(prologTok), PREFIX(contentTok), \ 33 { PREFIX(prologTok), PREFIX(contentTok), \
34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ 34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ 35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
36 PREFIX(sameName), \ 36 PREFIX(sameName), \
37 PREFIX(nameMatchesAscii), \ 37 PREFIX(nameMatchesAscii), \
38 PREFIX(nameLength), \ 38 PREFIX(nameLength), \
39 PREFIX(skipS), \ 39 PREFIX(skipS), \
40 PREFIX(getAtts), \ 40 PREFIX(getAtts), \
41 PREFIX(charRefNumber), \ 41 PREFIX(charRefNumber), \
42 PREFIX(predefinedEntityName), \ 42 PREFIX(predefinedEntityName), \
43 PREFIX(updatePosition), \ 43 PREFIX(updatePosition), \
44 PREFIX(isPublicId) 44 PREFIX(isPublicId)
45 45
46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) 46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
47 47
48 #define UCS2_GET_NAMING(pages, hi, lo) \ 48 #define UCS2_GET_NAMING(pages, hi, lo) \
49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) 49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
50 50
51 /* A 2 byte UTF-8 representation splits the characters 11 bits between 51 /* A 2 byte UTF-8 representation splits the characters 11 bits between
52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into 52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
53 pages, 3 bits to add to that index and 5 bits to generate the mask. 53 pages, 3 bits to add to that index and 5 bits to generate the mask.
54 */ 54 */
55 #define UTF8_GET_NAMING2(pages, byte) \ 55 #define UTF8_GET_NAMING2(pages, byte) \
56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ 56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
57 + ((((byte)[0]) & 3) << 1) \ 57 + ((((byte)[0]) & 3) << 1) \
58 + ((((byte)[1]) >> 5) & 1)] \ 58 + ((((byte)[1]) >> 5) & 1)] \
59 & (1 << (((byte)[1]) & 0x1F))) 59 & (1u << (((byte)[1]) & 0x1F)))
60 60
61 /* A 3 byte UTF-8 representation splits the characters 16 bits between 61 /* A 3 byte UTF-8 representation splits the characters 16 bits between
62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index 62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
63 into pages, 3 bits to add to that index and 5 bits to generate the 63 into pages, 3 bits to add to that index and 5 bits to generate the
64 mask. 64 mask.
65 */ 65 */
66 #define UTF8_GET_NAMING3(pages, byte) \ 66 #define UTF8_GET_NAMING3(pages, byte) \
67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ 67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
68 + ((((byte)[1]) >> 2) & 0xF)] \ 68 + ((((byte)[1]) >> 2) & 0xF)] \
69 << 3) \ 69 << 3) \
70 + ((((byte)[1]) & 3) << 1) \ 70 + ((((byte)[1]) & 3) << 1) \
71 + ((((byte)[2]) >> 5) & 1)] \ 71 + ((((byte)[2]) >> 5) & 1)] \
72 & (1 << (((byte)[2]) & 0x1F))) 72 & (1u << (((byte)[2]) & 0x1F)))
73 73
74 #define UTF8_GET_NAMING(pages, p, n) \ 74 #define UTF8_GET_NAMING(pages, p, n) \
75 ((n) == 2 \ 75 ((n) == 2 \
76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ 76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
77 : ((n) == 3 \ 77 : ((n) == 3 \
78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ 78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
79 : 0)) 79 : 0))
80 80
81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B 81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ 82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 || \ 115 || \
116 ((*p) == 0xF0 \ 116 ((*p) == 0xF0 \
117 ? \ 117 ? \
118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ 118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
119 : \ 119 : \
120 ((p)[1] & 0x80) == 0 \ 120 ((p)[1] & 0x80) == 0 \
121 || \ 121 || \
122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) 122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
123 123
124 static int PTRFASTCALL 124 static int PTRFASTCALL
125 isNever(const ENCODING *enc, const char *p) 125 isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
126 { 126 {
127 return 0; 127 return 0;
128 } 128 }
129 129
130 static int PTRFASTCALL 130 static int PTRFASTCALL
131 utf8_isName2(const ENCODING *enc, const char *p) 131 utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
132 { 132 {
133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); 133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
134 } 134 }
135 135
136 static int PTRFASTCALL 136 static int PTRFASTCALL
137 utf8_isName3(const ENCODING *enc, const char *p) 137 utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
138 { 138 {
139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); 139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
140 } 140 }
141 141
142 #define utf8_isName4 isNever 142 #define utf8_isName4 isNever
143 143
144 static int PTRFASTCALL 144 static int PTRFASTCALL
145 utf8_isNmstrt2(const ENCODING *enc, const char *p) 145 utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
146 { 146 {
147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); 147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
148 } 148 }
149 149
150 static int PTRFASTCALL 150 static int PTRFASTCALL
151 utf8_isNmstrt3(const ENCODING *enc, const char *p) 151 utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
152 { 152 {
153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); 153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
154 } 154 }
155 155
156 #define utf8_isNmstrt4 isNever 156 #define utf8_isNmstrt4 isNever
157 157
158 static int PTRFASTCALL 158 static int PTRFASTCALL
159 utf8_isInvalid2(const ENCODING *enc, const char *p) 159 utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
160 { 160 {
161 return UTF8_INVALID2((const unsigned char *)p); 161 return UTF8_INVALID2((const unsigned char *)p);
162 } 162 }
163 163
164 static int PTRFASTCALL 164 static int PTRFASTCALL
165 utf8_isInvalid3(const ENCODING *enc, const char *p) 165 utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
166 { 166 {
167 return UTF8_INVALID3((const unsigned char *)p); 167 return UTF8_INVALID3((const unsigned char *)p);
168 } 168 }
169 169
170 static int PTRFASTCALL 170 static int PTRFASTCALL
171 utf8_isInvalid4(const ENCODING *enc, const char *p) 171 utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
172 { 172 {
173 return UTF8_INVALID4((const unsigned char *)p); 173 return UTF8_INVALID4((const unsigned char *)p);
174 } 174 }
175 175
176 struct normal_encoding { 176 struct normal_encoding {
177 ENCODING enc; 177 ENCODING enc;
178 unsigned char type[256]; 178 unsigned char type[256];
179 #ifdef XML_MIN_SIZE 179 #ifdef XML_MIN_SIZE
180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *); 180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); 181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
215 E ## isName2, \ 215 E ## isName2, \
216 E ## isName3, \ 216 E ## isName3, \
217 E ## isName4, \ 217 E ## isName4, \
218 E ## isNmstrt2, \ 218 E ## isNmstrt2, \
219 E ## isNmstrt3, \ 219 E ## isNmstrt3, \
220 E ## isNmstrt4, \ 220 E ## isNmstrt4, \
221 E ## isInvalid2, \ 221 E ## isInvalid2, \
222 E ## isInvalid3, \ 222 E ## isInvalid3, \
223 E ## isInvalid4 223 E ## isInvalid4
224 224
225 #define NULL_VTABLE \
226 /* isName2 */ NULL, \
227 /* isName3 */ NULL, \
228 /* isName4 */ NULL, \
229 /* isNmstrt2 */ NULL, \
230 /* isNmstrt3 */ NULL, \
231 /* isNmstrt4 */ NULL, \
232 /* isInvalid2 */ NULL, \
233 /* isInvalid3 */ NULL, \
234 /* isInvalid4 */ NULL
235
225 static int FASTCALL checkCharRefNumber(int); 236 static int FASTCALL checkCharRefNumber(int);
226 237
227 #include "xmltok_impl.h" 238 #include "xmltok_impl.h"
228 #include "ascii.h" 239 #include "ascii.h"
229 240
230 #ifdef XML_MIN_SIZE 241 #ifdef XML_MIN_SIZE
231 #define sb_isNameMin isNever 242 #define sb_isNameMin isNever
232 #define sb_isNmstrtMin isNever 243 #define sb_isNmstrtMin isNever
233 #endif 244 #endif
234 245
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
311 #undef IS_NMSTRT_CHAR_MINBPC 322 #undef IS_NMSTRT_CHAR_MINBPC
312 #undef IS_INVALID_CHAR 323 #undef IS_INVALID_CHAR
313 324
314 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 325 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
315 UTF8_cval1 = 0x00, 326 UTF8_cval1 = 0x00,
316 UTF8_cval2 = 0xc0, 327 UTF8_cval2 = 0xc0,
317 UTF8_cval3 = 0xe0, 328 UTF8_cval3 = 0xe0,
318 UTF8_cval4 = 0xf0 329 UTF8_cval4 = 0xf0
319 }; 330 };
320 331
321 static void PTRCALL 332 void
322 utf8_toUtf8(const ENCODING *enc, 333 align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
334 {
335 const char * fromLim = *fromLimRef;
336 size_t walked = 0;
337 for (; fromLim > from; fromLim--, walked++) {
338 const unsigned char prev = (unsigned char)fromLim[-1];
339 if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
340 if (walked + 1 >= 4) {
341 fromLim += 4 - 1;
342 break;
343 } else {
344 walked = 0;
345 }
346 } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxx x byte */
347 if (walked + 1 >= 3) {
348 fromLim += 3 - 1;
349 break;
350 } else {
351 walked = 0;
352 }
353 } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxx x byte */
354 if (walked + 1 >= 2) {
355 fromLim += 2 - 1;
356 break;
357 } else {
358 walked = 0;
359 }
360 } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxx xx */
361 break;
362 }
363 }
364 *fromLimRef = fromLim;
365 }
366
367 static enum XML_Convert_Result PTRCALL
368 utf8_toUtf8(const ENCODING *UNUSED_P(enc),
323 const char **fromP, const char *fromLim, 369 const char **fromP, const char *fromLim,
324 char **toP, const char *toLim) 370 char **toP, const char *toLim)
325 { 371 {
372 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
326 char *to; 373 char *to;
327 const char *from; 374 const char *from;
328 if (fromLim - *fromP > toLim - *toP) { 375 if (fromLim - *fromP > toLim - *toP) {
329 /* Avoid copying partial characters. */ 376 /* Avoid copying partial characters. */
330 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) 377 res = XML_CONVERT_OUTPUT_EXHAUSTED;
331 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) 378 fromLim = *fromP + (toLim - *toP);
332 break; 379 align_limit_to_full_utf8_characters(*fromP, &fromLim);
333 } 380 }
334 for (to = *toP, from = *fromP; from != fromLim; from++, to++) 381 for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
335 *to = *from; 382 *to = *from;
336 *fromP = from; 383 *fromP = from;
337 *toP = to; 384 *toP = to;
385
386 if ((to == toLim) && (from < fromLim))
387 return XML_CONVERT_OUTPUT_EXHAUSTED;
388 else
389 return res;
338 } 390 }
339 391
340 static void PTRCALL 392 static enum XML_Convert_Result PTRCALL
341 utf8_toUtf16(const ENCODING *enc, 393 utf8_toUtf16(const ENCODING *enc,
342 const char **fromP, const char *fromLim, 394 const char **fromP, const char *fromLim,
343 unsigned short **toP, const unsigned short *toLim) 395 unsigned short **toP, const unsigned short *toLim)
344 { 396 {
397 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
345 unsigned short *to = *toP; 398 unsigned short *to = *toP;
346 const char *from = *fromP; 399 const char *from = *fromP;
347 while (from != fromLim && to != toLim) { 400 while (from < fromLim && to < toLim) {
348 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { 401 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
349 case BT_LEAD2: 402 case BT_LEAD2:
403 if (fromLim - from < 2) {
404 res = XML_CONVERT_INPUT_INCOMPLETE;
405 goto after;
406 }
350 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); 407 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
351 from += 2; 408 from += 2;
352 break; 409 break;
353 case BT_LEAD3: 410 case BT_LEAD3:
411 if (fromLim - from < 3) {
412 res = XML_CONVERT_INPUT_INCOMPLETE;
413 goto after;
414 }
354 *to++ = (unsigned short)(((from[0] & 0xf) << 12) 415 *to++ = (unsigned short)(((from[0] & 0xf) << 12)
355 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); 416 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
356 from += 3; 417 from += 3;
357 break; 418 break;
358 case BT_LEAD4: 419 case BT_LEAD4:
359 { 420 {
360 unsigned long n; 421 unsigned long n;
361 if (to + 1 == toLim) 422 if (toLim - to < 2) {
423 res = XML_CONVERT_OUTPUT_EXHAUSTED;
362 goto after; 424 goto after;
425 }
426 if (fromLim - from < 4) {
427 res = XML_CONVERT_INPUT_INCOMPLETE;
428 goto after;
429 }
363 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) 430 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
364 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); 431 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
365 n -= 0x10000; 432 n -= 0x10000;
366 to[0] = (unsigned short)((n >> 10) | 0xD800); 433 to[0] = (unsigned short)((n >> 10) | 0xD800);
367 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); 434 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
368 to += 2; 435 to += 2;
369 from += 4; 436 from += 4;
370 } 437 }
371 break; 438 break;
372 default: 439 default:
373 *to++ = *from++; 440 *to++ = *from++;
374 break; 441 break;
375 } 442 }
376 } 443 }
444 if (from < fromLim)
445 res = XML_CONVERT_OUTPUT_EXHAUSTED;
377 after: 446 after:
378 *fromP = from; 447 *fromP = from;
379 *toP = to; 448 *toP = to;
449 return res;
380 } 450 }
381 451
382 #ifdef XML_NS 452 #ifdef XML_NS
383 static const struct normal_encoding utf8_encoding_ns = { 453 static const struct normal_encoding utf8_encoding_ns = {
384 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 454 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
385 { 455 {
386 #include "asciitab.h" 456 #include "asciitab.h"
387 #include "utf8tab.h" 457 #include "utf8tab.h"
388 }, 458 },
389 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 459 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
(...skipping 28 matching lines...) Expand all
418 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 488 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
419 { 489 {
420 #define BT_COLON BT_NMSTRT 490 #define BT_COLON BT_NMSTRT
421 #include "iasciitab.h" 491 #include "iasciitab.h"
422 #undef BT_COLON 492 #undef BT_COLON
423 #include "utf8tab.h" 493 #include "utf8tab.h"
424 }, 494 },
425 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 495 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
426 }; 496 };
427 497
428 static void PTRCALL 498 static enum XML_Convert_Result PTRCALL
429 latin1_toUtf8(const ENCODING *enc, 499 latin1_toUtf8(const ENCODING *UNUSED_P(enc),
430 const char **fromP, const char *fromLim, 500 const char **fromP, const char *fromLim,
431 char **toP, const char *toLim) 501 char **toP, const char *toLim)
432 { 502 {
433 for (;;) { 503 for (;;) {
434 unsigned char c; 504 unsigned char c;
435 if (*fromP == fromLim) 505 if (*fromP == fromLim)
436 break; 506 return XML_CONVERT_COMPLETED;
437 c = (unsigned char)**fromP; 507 c = (unsigned char)**fromP;
438 if (c & 0x80) { 508 if (c & 0x80) {
439 if (toLim - *toP < 2) 509 if (toLim - *toP < 2)
440 break; 510 return XML_CONVERT_OUTPUT_EXHAUSTED;
441 *(*toP)++ = (char)((c >> 6) | UTF8_cval2); 511 *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
442 *(*toP)++ = (char)((c & 0x3f) | 0x80); 512 *(*toP)++ = (char)((c & 0x3f) | 0x80);
443 (*fromP)++; 513 (*fromP)++;
444 } 514 }
445 else { 515 else {
446 if (*toP == toLim) 516 if (*toP == toLim)
447 break; 517 return XML_CONVERT_OUTPUT_EXHAUSTED;
448 *(*toP)++ = *(*fromP)++; 518 *(*toP)++ = *(*fromP)++;
449 } 519 }
450 } 520 }
451 } 521 }
452 522
453 static void PTRCALL 523 static enum XML_Convert_Result PTRCALL
454 latin1_toUtf16(const ENCODING *enc, 524 latin1_toUtf16(const ENCODING *UNUSED_P(enc),
455 const char **fromP, const char *fromLim, 525 const char **fromP, const char *fromLim,
456 unsigned short **toP, const unsigned short *toLim) 526 unsigned short **toP, const unsigned short *toLim)
457 { 527 {
458 while (*fromP != fromLim && *toP != toLim) 528 while (*fromP < fromLim && *toP < toLim)
459 *(*toP)++ = (unsigned char)*(*fromP)++; 529 *(*toP)++ = (unsigned char)*(*fromP)++;
530
531 if ((*toP == toLim) && (*fromP < fromLim))
532 return XML_CONVERT_OUTPUT_EXHAUSTED;
533 else
534 return XML_CONVERT_COMPLETED;
460 } 535 }
461 536
462 #ifdef XML_NS 537 #ifdef XML_NS
463 538
464 static const struct normal_encoding latin1_encoding_ns = { 539 static const struct normal_encoding latin1_encoding_ns = {
465 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 540 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
466 { 541 {
467 #include "asciitab.h" 542 #include "asciitab.h"
468 #include "latin1tab.h" 543 #include "latin1tab.h"
469 }, 544 },
470 STANDARD_VTABLE(sb_) 545 STANDARD_VTABLE(sb_) NULL_VTABLE
471 }; 546 };
472 547
473 #endif 548 #endif
474 549
475 static const struct normal_encoding latin1_encoding = { 550 static const struct normal_encoding latin1_encoding = {
476 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 551 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
477 { 552 {
478 #define BT_COLON BT_NMSTRT 553 #define BT_COLON BT_NMSTRT
479 #include "asciitab.h" 554 #include "asciitab.h"
480 #undef BT_COLON 555 #undef BT_COLON
481 #include "latin1tab.h" 556 #include "latin1tab.h"
482 }, 557 },
483 STANDARD_VTABLE(sb_) 558 STANDARD_VTABLE(sb_) NULL_VTABLE
484 }; 559 };
485 560
486 static void PTRCALL 561 static enum XML_Convert_Result PTRCALL
487 ascii_toUtf8(const ENCODING *enc, 562 ascii_toUtf8(const ENCODING *UNUSED_P(enc),
488 const char **fromP, const char *fromLim, 563 const char **fromP, const char *fromLim,
489 char **toP, const char *toLim) 564 char **toP, const char *toLim)
490 { 565 {
491 while (*fromP != fromLim && *toP != toLim) 566 while (*fromP < fromLim && *toP < toLim)
492 *(*toP)++ = *(*fromP)++; 567 *(*toP)++ = *(*fromP)++;
568
569 if ((*toP == toLim) && (*fromP < fromLim))
570 return XML_CONVERT_OUTPUT_EXHAUSTED;
571 else
572 return XML_CONVERT_COMPLETED;
493 } 573 }
494 574
495 #ifdef XML_NS 575 #ifdef XML_NS
496 576
497 static const struct normal_encoding ascii_encoding_ns = { 577 static const struct normal_encoding ascii_encoding_ns = {
498 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 578 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
499 { 579 {
500 #include "asciitab.h" 580 #include "asciitab.h"
501 /* BT_NONXML == 0 */ 581 /* BT_NONXML == 0 */
502 }, 582 },
503 STANDARD_VTABLE(sb_) 583 STANDARD_VTABLE(sb_) NULL_VTABLE
504 }; 584 };
505 585
506 #endif 586 #endif
507 587
508 static const struct normal_encoding ascii_encoding = { 588 static const struct normal_encoding ascii_encoding = {
509 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 589 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
510 { 590 {
511 #define BT_COLON BT_NMSTRT 591 #define BT_COLON BT_NMSTRT
512 #include "asciitab.h" 592 #include "asciitab.h"
513 #undef BT_COLON 593 #undef BT_COLON
514 /* BT_NONXML == 0 */ 594 /* BT_NONXML == 0 */
515 }, 595 },
516 STANDARD_VTABLE(sb_) 596 STANDARD_VTABLE(sb_) NULL_VTABLE
517 }; 597 };
518 598
519 static int PTRFASTCALL 599 static int PTRFASTCALL
520 unicode_byte_type(char hi, char lo) 600 unicode_byte_type(char hi, char lo)
521 { 601 {
522 switch ((unsigned char)hi) { 602 switch ((unsigned char)hi) {
523 case 0xD8: case 0xD9: case 0xDA: case 0xDB: 603 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
524 return BT_LEAD4; 604 return BT_LEAD4;
525 case 0xDC: case 0xDD: case 0xDE: case 0xDF: 605 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
526 return BT_TRAIL; 606 return BT_TRAIL;
527 case 0xFF: 607 case 0xFF:
528 switch ((unsigned char)lo) { 608 switch ((unsigned char)lo) {
529 case 0xFF: 609 case 0xFF:
530 case 0xFE: 610 case 0xFE:
531 return BT_NONXML; 611 return BT_NONXML;
532 } 612 }
533 break; 613 break;
534 } 614 }
535 return BT_NONASCII; 615 return BT_NONASCII;
536 } 616 }
537 617
538 #define DEFINE_UTF16_TO_UTF8(E) \ 618 #define DEFINE_UTF16_TO_UTF8(E) \
539 static void PTRCALL \ 619 static enum XML_Convert_Result PTRCALL \
540 E ## toUtf8(const ENCODING *enc, \ 620 E ## toUtf8(const ENCODING *UNUSED_P(enc), \
541 const char **fromP, const char *fromLim, \ 621 const char **fromP, const char *fromLim, \
542 char **toP, const char *toLim) \ 622 char **toP, const char *toLim) \
543 { \ 623 { \
544 const char *from; \ 624 const char *from = *fromP; \
545 for (from = *fromP; from != fromLim; from += 2) { \ 625 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
626 for (; from < fromLim; from += 2) { \
546 int plane; \ 627 int plane; \
547 unsigned char lo2; \ 628 unsigned char lo2; \
548 unsigned char lo = GET_LO(from); \ 629 unsigned char lo = GET_LO(from); \
549 unsigned char hi = GET_HI(from); \ 630 unsigned char hi = GET_HI(from); \
550 switch (hi) { \ 631 switch (hi) { \
551 case 0: \ 632 case 0: \
552 if (lo < 0x80) { \ 633 if (lo < 0x80) { \
553 if (*toP == toLim) { \ 634 if (*toP == toLim) { \
554 *fromP = from; \ 635 *fromP = from; \
555 return; \ 636 return XML_CONVERT_OUTPUT_EXHAUSTED; \
556 } \ 637 } \
557 *(*toP)++ = lo; \ 638 *(*toP)++ = lo; \
558 break; \ 639 break; \
559 } \ 640 } \
560 /* fall through */ \ 641 /* fall through */ \
561 case 0x1: case 0x2: case 0x3: \ 642 case 0x1: case 0x2: case 0x3: \
562 case 0x4: case 0x5: case 0x6: case 0x7: \ 643 case 0x4: case 0x5: case 0x6: case 0x7: \
563 if (toLim - *toP < 2) { \ 644 if (toLim - *toP < 2) { \
564 *fromP = from; \ 645 *fromP = from; \
565 return; \ 646 return XML_CONVERT_OUTPUT_EXHAUSTED; \
566 } \ 647 } \
567 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ 648 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
568 *(*toP)++ = ((lo & 0x3f) | 0x80); \ 649 *(*toP)++ = ((lo & 0x3f) | 0x80); \
569 break; \ 650 break; \
570 default: \ 651 default: \
571 if (toLim - *toP < 3) { \ 652 if (toLim - *toP < 3) { \
572 *fromP = from; \ 653 *fromP = from; \
573 return; \ 654 return XML_CONVERT_OUTPUT_EXHAUSTED; \
574 } \ 655 } \
575 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ 656 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
576 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ 657 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
577 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ 658 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
578 *(*toP)++ = ((lo & 0x3f) | 0x80); \ 659 *(*toP)++ = ((lo & 0x3f) | 0x80); \
579 break; \ 660 break; \
580 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ 661 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
581 if (toLim - *toP < 4) { \ 662 if (toLim - *toP < 4) { \
582 *fromP = from; \ 663 *fromP = from; \
583 return; \ 664 return XML_CONVERT_OUTPUT_EXHAUSTED; \
665 } \
666 if (fromLim - from < 4) { \
667 *fromP = from; \
668 return XML_CONVERT_INPUT_INCOMPLETE; \
584 } \ 669 } \
585 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ 670 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
586 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ 671 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
587 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ 672 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
588 from += 2; \ 673 from += 2; \
589 lo2 = GET_LO(from); \ 674 lo2 = GET_LO(from); \
590 *(*toP)++ = (((lo & 0x3) << 4) \ 675 *(*toP)++ = (((lo & 0x3) << 4) \
591 | ((GET_HI(from) & 0x3) << 2) \ 676 | ((GET_HI(from) & 0x3) << 2) \
592 | (lo2 >> 6) \ 677 | (lo2 >> 6) \
593 | 0x80); \ 678 | 0x80); \
594 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ 679 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
595 break; \ 680 break; \
596 } \ 681 } \
597 } \ 682 } \
598 *fromP = from; \ 683 *fromP = from; \
684 if (from < fromLim) \
685 return XML_CONVERT_INPUT_INCOMPLETE; \
686 else \
687 return XML_CONVERT_COMPLETED; \
599 } 688 }
600 689
601 #define DEFINE_UTF16_TO_UTF16(E) \ 690 #define DEFINE_UTF16_TO_UTF16(E) \
602 static void PTRCALL \ 691 static enum XML_Convert_Result PTRCALL \
603 E ## toUtf16(const ENCODING *enc, \ 692 E ## toUtf16(const ENCODING *UNUSED_P(enc), \
604 const char **fromP, const char *fromLim, \ 693 const char **fromP, const char *fromLim, \
605 unsigned short **toP, const unsigned short *toLim) \ 694 unsigned short **toP, const unsigned short *toLim) \
606 { \ 695 { \
696 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
697 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
607 /* Avoid copying first half only of surrogate */ \ 698 /* Avoid copying first half only of surrogate */ \
608 if (fromLim - *fromP > ((toLim - *toP) << 1) \ 699 if (fromLim - *fromP > ((toLim - *toP) << 1) \
609 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ 700 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
610 fromLim -= 2; \ 701 fromLim -= 2; \
611 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ 702 res = XML_CONVERT_INPUT_INCOMPLETE; \
703 } \
704 for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
612 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ 705 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
706 if ((*toP == toLim) && (*fromP < fromLim)) \
707 return XML_CONVERT_OUTPUT_EXHAUSTED; \
708 else \
709 return res; \
613 } 710 }
614 711
615 #define SET2(ptr, ch) \ 712 #define SET2(ptr, ch) \
616 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) 713 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
617 #define GET_LO(ptr) ((unsigned char)(ptr)[0]) 714 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
618 #define GET_HI(ptr) ((unsigned char)(ptr)[1]) 715 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
619 716
620 DEFINE_UTF16_TO_UTF8(little2_) 717 DEFINE_UTF16_TO_UTF8(little2_)
621 DEFINE_UTF16_TO_UTF16(little2_) 718 DEFINE_UTF16_TO_UTF16(little2_)
622 719
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
719 #if BYTEORDER == 1234 816 #if BYTEORDER == 1234
720 1 817 1
721 #else 818 #else
722 0 819 0
723 #endif 820 #endif
724 }, 821 },
725 { 822 {
726 #include "asciitab.h" 823 #include "asciitab.h"
727 #include "latin1tab.h" 824 #include "latin1tab.h"
728 }, 825 },
729 STANDARD_VTABLE(little2_) 826 STANDARD_VTABLE(little2_) NULL_VTABLE
730 }; 827 };
731 828
732 #endif 829 #endif
733 830
734 static const struct normal_encoding little2_encoding = { 831 static const struct normal_encoding little2_encoding = {
735 { VTABLE, 2, 0, 832 { VTABLE, 2, 0,
736 #if BYTEORDER == 1234 833 #if BYTEORDER == 1234
737 1 834 1
738 #else 835 #else
739 0 836 0
740 #endif 837 #endif
741 }, 838 },
742 { 839 {
743 #define BT_COLON BT_NMSTRT 840 #define BT_COLON BT_NMSTRT
744 #include "asciitab.h" 841 #include "asciitab.h"
745 #undef BT_COLON 842 #undef BT_COLON
746 #include "latin1tab.h" 843 #include "latin1tab.h"
747 }, 844 },
748 STANDARD_VTABLE(little2_) 845 STANDARD_VTABLE(little2_) NULL_VTABLE
749 }; 846 };
750 847
751 #if BYTEORDER != 4321 848 #if BYTEORDER != 4321
752 849
753 #ifdef XML_NS 850 #ifdef XML_NS
754 851
755 static const struct normal_encoding internal_little2_encoding_ns = { 852 static const struct normal_encoding internal_little2_encoding_ns = {
756 { VTABLE, 2, 0, 1 }, 853 { VTABLE, 2, 0, 1 },
757 { 854 {
758 #include "iasciitab.h" 855 #include "iasciitab.h"
759 #include "latin1tab.h" 856 #include "latin1tab.h"
760 }, 857 },
761 STANDARD_VTABLE(little2_) 858 STANDARD_VTABLE(little2_) NULL_VTABLE
762 }; 859 };
763 860
764 #endif 861 #endif
765 862
766 static const struct normal_encoding internal_little2_encoding = { 863 static const struct normal_encoding internal_little2_encoding = {
767 { VTABLE, 2, 0, 1 }, 864 { VTABLE, 2, 0, 1 },
768 { 865 {
769 #define BT_COLON BT_NMSTRT 866 #define BT_COLON BT_NMSTRT
770 #include "iasciitab.h" 867 #include "iasciitab.h"
771 #undef BT_COLON 868 #undef BT_COLON
772 #include "latin1tab.h" 869 #include "latin1tab.h"
773 }, 870 },
774 STANDARD_VTABLE(little2_) 871 STANDARD_VTABLE(little2_) NULL_VTABLE
775 }; 872 };
776 873
777 #endif 874 #endif
778 875
779 876
780 #define BIG2_BYTE_TYPE(enc, p) \ 877 #define BIG2_BYTE_TYPE(enc, p) \
781 ((p)[0] == 0 \ 878 ((p)[0] == 0 \
782 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ 879 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
783 : unicode_byte_type((p)[0], (p)[1])) 880 : unicode_byte_type((p)[0], (p)[1]))
784 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) 881 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
860 #if BYTEORDER == 4321 957 #if BYTEORDER == 4321
861 1 958 1
862 #else 959 #else
863 0 960 0
864 #endif 961 #endif
865 }, 962 },
866 { 963 {
867 #include "asciitab.h" 964 #include "asciitab.h"
868 #include "latin1tab.h" 965 #include "latin1tab.h"
869 }, 966 },
870 STANDARD_VTABLE(big2_) 967 STANDARD_VTABLE(big2_) NULL_VTABLE
871 }; 968 };
872 969
873 #endif 970 #endif
874 971
875 static const struct normal_encoding big2_encoding = { 972 static const struct normal_encoding big2_encoding = {
876 { VTABLE, 2, 0, 973 { VTABLE, 2, 0,
877 #if BYTEORDER == 4321 974 #if BYTEORDER == 4321
878 1 975 1
879 #else 976 #else
880 0 977 0
881 #endif 978 #endif
882 }, 979 },
883 { 980 {
884 #define BT_COLON BT_NMSTRT 981 #define BT_COLON BT_NMSTRT
885 #include "asciitab.h" 982 #include "asciitab.h"
886 #undef BT_COLON 983 #undef BT_COLON
887 #include "latin1tab.h" 984 #include "latin1tab.h"
888 }, 985 },
889 STANDARD_VTABLE(big2_) 986 STANDARD_VTABLE(big2_) NULL_VTABLE
890 }; 987 };
891 988
892 #if BYTEORDER != 1234 989 #if BYTEORDER != 1234
893 990
894 #ifdef XML_NS 991 #ifdef XML_NS
895 992
896 static const struct normal_encoding internal_big2_encoding_ns = { 993 static const struct normal_encoding internal_big2_encoding_ns = {
897 { VTABLE, 2, 0, 1 }, 994 { VTABLE, 2, 0, 1 },
898 { 995 {
899 #include "iasciitab.h" 996 #include "iasciitab.h"
900 #include "latin1tab.h" 997 #include "latin1tab.h"
901 }, 998 },
902 STANDARD_VTABLE(big2_) 999 STANDARD_VTABLE(big2_) NULL_VTABLE
903 }; 1000 };
904 1001
905 #endif 1002 #endif
906 1003
907 static const struct normal_encoding internal_big2_encoding = { 1004 static const struct normal_encoding internal_big2_encoding = {
908 { VTABLE, 2, 0, 1 }, 1005 { VTABLE, 2, 0, 1 },
909 { 1006 {
910 #define BT_COLON BT_NMSTRT 1007 #define BT_COLON BT_NMSTRT
911 #include "iasciitab.h" 1008 #include "iasciitab.h"
912 #undef BT_COLON 1009 #undef BT_COLON
913 #include "latin1tab.h" 1010 #include "latin1tab.h"
914 }, 1011 },
915 STANDARD_VTABLE(big2_) 1012 STANDARD_VTABLE(big2_) NULL_VTABLE
916 }; 1013 };
917 1014
918 #endif 1015 #endif
919 1016
920 #undef PREFIX 1017 #undef PREFIX
921 1018
922 static int FASTCALL 1019 static int FASTCALL
923 streqci(const char *s1, const char *s2) 1020 streqci(const char *s1, const char *s2)
924 { 1021 {
925 for (;;) { 1022 for (;;) {
926 char c1 = *s1++; 1023 char c1 = *s1++;
927 char c2 = *s2++; 1024 char c2 = *s2++;
928 if (ASCII_a <= c1 && c1 <= ASCII_z) 1025 if (ASCII_a <= c1 && c1 <= ASCII_z)
929 c1 += ASCII_A - ASCII_a; 1026 c1 += ASCII_A - ASCII_a;
930 if (ASCII_a <= c2 && c2 <= ASCII_z) 1027 if (ASCII_a <= c2 && c2 <= ASCII_z)
931 c2 += ASCII_A - ASCII_a; 1028 c2 += ASCII_A - ASCII_a;
932 if (c1 != c2) 1029 if (c1 != c2)
933 return 0; 1030 return 0;
934 if (!c1) 1031 if (!c1)
935 break; 1032 break;
936 } 1033 }
937 return 1; 1034 return 1;
938 } 1035 }
939 1036
940 static void PTRCALL 1037 static void PTRCALL
941 initUpdatePosition(const ENCODING *enc, const char *ptr, 1038 initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
942 const char *end, POSITION *pos) 1039 const char *end, POSITION *pos)
943 { 1040 {
944 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); 1041 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
945 } 1042 }
946 1043
947 static int 1044 static int
948 toAscii(const ENCODING *enc, const char *ptr, const char *end) 1045 toAscii(const ENCODING *enc, const char *ptr, const char *end)
949 { 1046 {
950 char buf[1]; 1047 char buf[1];
951 char *p = buf; 1048 char *p = buf;
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
1281 } 1378 }
1282 1379
1283 static int PTRFASTCALL 1380 static int PTRFASTCALL
1284 unknown_isInvalid(const ENCODING *enc, const char *p) 1381 unknown_isInvalid(const ENCODING *enc, const char *p)
1285 { 1382 {
1286 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1383 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1287 int c = uenc->convert(uenc->userData, p); 1384 int c = uenc->convert(uenc->userData, p);
1288 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 1385 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1289 } 1386 }
1290 1387
1291 static void PTRCALL 1388 static enum XML_Convert_Result PTRCALL
1292 unknown_toUtf8(const ENCODING *enc, 1389 unknown_toUtf8(const ENCODING *enc,
1293 const char **fromP, const char *fromLim, 1390 const char **fromP, const char *fromLim,
1294 char **toP, const char *toLim) 1391 char **toP, const char *toLim)
1295 { 1392 {
1296 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1393 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1297 char buf[XML_UTF8_ENCODE_MAX]; 1394 char buf[XML_UTF8_ENCODE_MAX];
1298 for (;;) { 1395 for (;;) {
1299 const char *utf8; 1396 const char *utf8;
1300 int n; 1397 int n;
1301 if (*fromP == fromLim) 1398 if (*fromP == fromLim)
1302 break; 1399 return XML_CONVERT_COMPLETED;
1303 utf8 = uenc->utf8[(unsigned char)**fromP]; 1400 utf8 = uenc->utf8[(unsigned char)**fromP];
1304 n = *utf8++; 1401 n = *utf8++;
1305 if (n == 0) { 1402 if (n == 0) {
1306 int c = uenc->convert(uenc->userData, *fromP); 1403 int c = uenc->convert(uenc->userData, *fromP);
1307 n = XmlUtf8Encode(c, buf); 1404 n = XmlUtf8Encode(c, buf);
1308 if (n > toLim - *toP) 1405 if (n > toLim - *toP)
1309 break; 1406 return XML_CONVERT_OUTPUT_EXHAUSTED;
1310 utf8 = buf; 1407 utf8 = buf;
1311 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 1408 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1312 - (BT_LEAD2 - 2)); 1409 - (BT_LEAD2 - 2));
1313 } 1410 }
1314 else { 1411 else {
1315 if (n > toLim - *toP) 1412 if (n > toLim - *toP)
1316 break; 1413 return XML_CONVERT_OUTPUT_EXHAUSTED;
1317 (*fromP)++; 1414 (*fromP)++;
1318 } 1415 }
1319 do { 1416 do {
1320 *(*toP)++ = *utf8++; 1417 *(*toP)++ = *utf8++;
1321 } while (--n != 0); 1418 } while (--n != 0);
1322 } 1419 }
1323 } 1420 }
1324 1421
1325 static void PTRCALL 1422 static enum XML_Convert_Result PTRCALL
1326 unknown_toUtf16(const ENCODING *enc, 1423 unknown_toUtf16(const ENCODING *enc,
1327 const char **fromP, const char *fromLim, 1424 const char **fromP, const char *fromLim,
1328 unsigned short **toP, const unsigned short *toLim) 1425 unsigned short **toP, const unsigned short *toLim)
1329 { 1426 {
1330 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1427 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1331 while (*fromP != fromLim && *toP != toLim) { 1428 while (*fromP < fromLim && *toP < toLim) {
1332 unsigned short c = uenc->utf16[(unsigned char)**fromP]; 1429 unsigned short c = uenc->utf16[(unsigned char)**fromP];
1333 if (c == 0) { 1430 if (c == 0) {
1334 c = (unsigned short) 1431 c = (unsigned short)
1335 uenc->convert(uenc->userData, *fromP); 1432 uenc->convert(uenc->userData, *fromP);
1336 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 1433 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1337 - (BT_LEAD2 - 2)); 1434 - (BT_LEAD2 - 2));
1338 } 1435 }
1339 else 1436 else
1340 (*fromP)++; 1437 (*fromP)++;
1341 *(*toP)++ = c; 1438 *(*toP)++ = c;
1342 } 1439 }
1440
1441 if ((*toP == toLim) && (*fromP < fromLim))
1442 return XML_CONVERT_OUTPUT_EXHAUSTED;
1443 else
1444 return XML_CONVERT_COMPLETED;
1343 } 1445 }
1344 1446
1345 ENCODING * 1447 ENCODING *
1346 XmlInitUnknownEncoding(void *mem, 1448 XmlInitUnknownEncoding(void *mem,
1347 int *table, 1449 int *table,
1348 CONVERTER convert, 1450 CONVERTER convert,
1349 void *userData) 1451 void *userData)
1350 { 1452 {
1351 int i; 1453 int i;
1352 struct unknown_encoding *e = (struct unknown_encoding *)mem; 1454 struct unknown_encoding *e = (struct unknown_encoding *)mem;
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
1496 static int 1598 static int
1497 initScan(const ENCODING * const *encodingTable, 1599 initScan(const ENCODING * const *encodingTable,
1498 const INIT_ENCODING *enc, 1600 const INIT_ENCODING *enc,
1499 int state, 1601 int state,
1500 const char *ptr, 1602 const char *ptr,
1501 const char *end, 1603 const char *end,
1502 const char **nextTokPtr) 1604 const char **nextTokPtr)
1503 { 1605 {
1504 const ENCODING **encPtr; 1606 const ENCODING **encPtr;
1505 1607
1506 if (ptr == end) 1608 if (ptr >= end)
1507 return XML_TOK_NONE; 1609 return XML_TOK_NONE;
1508 encPtr = enc->encPtr; 1610 encPtr = enc->encPtr;
1509 if (ptr + 1 == end) { 1611 if (ptr + 1 == end) {
1510 /* only a single byte available for auto-detection */ 1612 /* only a single byte available for auto-detection */
1511 #ifndef XML_DTD /* FIXME */ 1613 #ifndef XML_DTD /* FIXME */
1512 /* a well-formed document entity must have more than one byte */ 1614 /* a well-formed document entity must have more than one byte */
1513 if (state != XML_CONTENT_STATE) 1615 if (state != XML_CONTENT_STATE)
1514 return XML_TOK_PARTIAL; 1616 return XML_TOK_PARTIAL;
1515 #endif 1617 #endif
1516 /* so we're parsing an external text entity... */ 1618 /* so we're parsing an external text entity... */
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
1642 CONVERTER convert, 1744 CONVERTER convert,
1643 void *userData) 1745 void *userData)
1644 { 1746 {
1645 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); 1747 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1646 if (enc) 1748 if (enc)
1647 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; 1749 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1648 return enc; 1750 return enc;
1649 } 1751 }
1650 1752
1651 #endif /* XML_NS */ 1753 #endif /* XML_NS */
OLDNEW
« no previous file with comments | « third_party/expat/files/lib/xmltok.h ('k') | third_party/expat/files/lib/xmltok.c.origin » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698