Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: third_party/expat/files/lib/xmltok.c.origin

Issue 2761253002: Update expat to 2.2.0 to fix CVE vulnerability. (Closed)
Patch Set: update README.chromium Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/expat/files/lib/xmltok.c ('k') | third_party/expat/files/lib/xmltok_impl.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission. 2 See the file COPYING for copying permission.
3 */ 3 */
4 4
5 #include <stddef.h> 5 #include <stddef.h>
6 6
7 #ifdef COMPILED_FROM_DSP 7 #ifdef WIN32
8 #include "winconfig.h" 8 #include "winconfig.h"
9 #elif defined(MACOS_CLASSIC) 9 #elif defined(MACOS_CLASSIC)
10 #include "macconfig.h" 10 #include "macconfig.h"
11 #elif defined(__amigaos__) 11 #elif defined(__amigaos__)
12 #include "amigaconfig.h" 12 #include "amigaconfig.h"
13 #elif defined(__WATCOMC__) 13 #elif defined(__WATCOMC__)
14 #include "watcomconfig.h" 14 #include "watcomconfig.h"
15 #else 15 #else
16 #ifdef HAVE_EXPAT_CONFIG_H 16 #ifdef HAVE_EXPAT_CONFIG_H
17 #include <expat_config.h> 17 #include <expat_config.h>
18 #endif 18 #endif
19 #endif /* ndef COMPILED_FROM_DSP */ 19 #endif /* ndef WIN32 */
20 20
21 #include "expat_external.h" 21 #include "expat_external.h"
22 #include "internal.h" 22 #include "internal.h"
23 #include "xmltok.h" 23 #include "xmltok.h"
24 #include "nametab.h" 24 #include "nametab.h"
25 25
26 #ifdef XML_DTD 26 #ifdef XML_DTD
27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) 27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
28 #else 28 #else
29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ 29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */
30 #endif 30 #endif
31 31
32 #define VTABLE1 \ 32 #define VTABLE1 \
33 { PREFIX(prologTok), PREFIX(contentTok), \ 33 { PREFIX(prologTok), PREFIX(contentTok), \
34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ 34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ 35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
36 PREFIX(sameName), \ 36 PREFIX(sameName), \
37 PREFIX(nameMatchesAscii), \ 37 PREFIX(nameMatchesAscii), \
38 PREFIX(nameLength), \ 38 PREFIX(nameLength), \
39 PREFIX(skipS), \ 39 PREFIX(skipS), \
40 PREFIX(getAtts), \ 40 PREFIX(getAtts), \
41 PREFIX(charRefNumber), \ 41 PREFIX(charRefNumber), \
42 PREFIX(predefinedEntityName), \ 42 PREFIX(predefinedEntityName), \
43 PREFIX(updatePosition), \ 43 PREFIX(updatePosition), \
44 PREFIX(isPublicId) 44 PREFIX(isPublicId)
45 45
46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) 46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
47 47
48 #define UCS2_GET_NAMING(pages, hi, lo) \ 48 #define UCS2_GET_NAMING(pages, hi, lo) \
49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) 49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
50 50
51 /* A 2 byte UTF-8 representation splits the characters 11 bits between 51 /* A 2 byte UTF-8 representation splits the characters 11 bits between
52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into 52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
53 pages, 3 bits to add to that index and 5 bits to generate the mask. 53 pages, 3 bits to add to that index and 5 bits to generate the mask.
54 */ 54 */
55 #define UTF8_GET_NAMING2(pages, byte) \ 55 #define UTF8_GET_NAMING2(pages, byte) \
56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ 56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
57 + ((((byte)[0]) & 3) << 1) \ 57 + ((((byte)[0]) & 3) << 1) \
58 + ((((byte)[1]) >> 5) & 1)] \ 58 + ((((byte)[1]) >> 5) & 1)] \
59 & (1 << (((byte)[1]) & 0x1F))) 59 & (1u << (((byte)[1]) & 0x1F)))
60 60
61 /* A 3 byte UTF-8 representation splits the characters 16 bits between 61 /* A 3 byte UTF-8 representation splits the characters 16 bits between
62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index 62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index
63 into pages, 3 bits to add to that index and 5 bits to generate the 63 into pages, 3 bits to add to that index and 5 bits to generate the
64 mask. 64 mask.
65 */ 65 */
66 #define UTF8_GET_NAMING3(pages, byte) \ 66 #define UTF8_GET_NAMING3(pages, byte) \
67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ 67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
68 + ((((byte)[1]) >> 2) & 0xF)] \ 68 + ((((byte)[1]) >> 2) & 0xF)] \
69 << 3) \ 69 << 3) \
70 + ((((byte)[1]) & 3) << 1) \ 70 + ((((byte)[1]) & 3) << 1) \
71 + ((((byte)[2]) >> 5) & 1)] \ 71 + ((((byte)[2]) >> 5) & 1)] \
72 & (1 << (((byte)[2]) & 0x1F))) 72 & (1u << (((byte)[2]) & 0x1F)))
73 73
74 #define UTF8_GET_NAMING(pages, p, n) \ 74 #define UTF8_GET_NAMING(pages, p, n) \
75 ((n) == 2 \ 75 ((n) == 2 \
76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ 76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
77 : ((n) == 3 \ 77 : ((n) == 3 \
78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ 78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
79 : 0)) 79 : 0))
80 80
81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B 81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ 82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 || \ 115 || \
116 ((*p) == 0xF0 \ 116 ((*p) == 0xF0 \
117 ? \ 117 ? \
118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ 118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
119 : \ 119 : \
120 ((p)[1] & 0x80) == 0 \ 120 ((p)[1] & 0x80) == 0 \
121 || \ 121 || \
122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) 122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
123 123
124 static int PTRFASTCALL 124 static int PTRFASTCALL
125 isNever(const ENCODING *enc, const char *p) 125 isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
126 { 126 {
127 return 0; 127 return 0;
128 } 128 }
129 129
130 static int PTRFASTCALL 130 static int PTRFASTCALL
131 utf8_isName2(const ENCODING *enc, const char *p) 131 utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
132 { 132 {
133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); 133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
134 } 134 }
135 135
136 static int PTRFASTCALL 136 static int PTRFASTCALL
137 utf8_isName3(const ENCODING *enc, const char *p) 137 utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
138 { 138 {
139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); 139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
140 } 140 }
141 141
142 #define utf8_isName4 isNever 142 #define utf8_isName4 isNever
143 143
144 static int PTRFASTCALL 144 static int PTRFASTCALL
145 utf8_isNmstrt2(const ENCODING *enc, const char *p) 145 utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
146 { 146 {
147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); 147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
148 } 148 }
149 149
150 static int PTRFASTCALL 150 static int PTRFASTCALL
151 utf8_isNmstrt3(const ENCODING *enc, const char *p) 151 utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
152 { 152 {
153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); 153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
154 } 154 }
155 155
156 #define utf8_isNmstrt4 isNever 156 #define utf8_isNmstrt4 isNever
157 157
158 static int PTRFASTCALL 158 static int PTRFASTCALL
159 utf8_isInvalid2(const ENCODING *enc, const char *p) 159 utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
160 { 160 {
161 return UTF8_INVALID2((const unsigned char *)p); 161 return UTF8_INVALID2((const unsigned char *)p);
162 } 162 }
163 163
164 static int PTRFASTCALL 164 static int PTRFASTCALL
165 utf8_isInvalid3(const ENCODING *enc, const char *p) 165 utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
166 { 166 {
167 return UTF8_INVALID3((const unsigned char *)p); 167 return UTF8_INVALID3((const unsigned char *)p);
168 } 168 }
169 169
170 static int PTRFASTCALL 170 static int PTRFASTCALL
171 utf8_isInvalid4(const ENCODING *enc, const char *p) 171 utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
172 { 172 {
173 return UTF8_INVALID4((const unsigned char *)p); 173 return UTF8_INVALID4((const unsigned char *)p);
174 } 174 }
175 175
176 struct normal_encoding { 176 struct normal_encoding {
177 ENCODING enc; 177 ENCODING enc;
178 unsigned char type[256]; 178 unsigned char type[256];
179 #ifdef XML_MIN_SIZE 179 #ifdef XML_MIN_SIZE
180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *); 180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); 181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
215 E ## isName2, \ 215 E ## isName2, \
216 E ## isName3, \ 216 E ## isName3, \
217 E ## isName4, \ 217 E ## isName4, \
218 E ## isNmstrt2, \ 218 E ## isNmstrt2, \
219 E ## isNmstrt3, \ 219 E ## isNmstrt3, \
220 E ## isNmstrt4, \ 220 E ## isNmstrt4, \
221 E ## isInvalid2, \ 221 E ## isInvalid2, \
222 E ## isInvalid3, \ 222 E ## isInvalid3, \
223 E ## isInvalid4 223 E ## isInvalid4
224 224
225 #define NULL_VTABLE \
226 /* isName2 */ NULL, \
227 /* isName3 */ NULL, \
228 /* isName4 */ NULL, \
229 /* isNmstrt2 */ NULL, \
230 /* isNmstrt3 */ NULL, \
231 /* isNmstrt4 */ NULL, \
232 /* isInvalid2 */ NULL, \
233 /* isInvalid3 */ NULL, \
234 /* isInvalid4 */ NULL
235
225 static int FASTCALL checkCharRefNumber(int); 236 static int FASTCALL checkCharRefNumber(int);
226 237
227 #include "xmltok_impl.h" 238 #include "xmltok_impl.h"
228 #include "ascii.h" 239 #include "ascii.h"
229 240
230 #ifdef XML_MIN_SIZE 241 #ifdef XML_MIN_SIZE
231 #define sb_isNameMin isNever 242 #define sb_isNameMin isNever
232 #define sb_isNmstrtMin isNever 243 #define sb_isNmstrtMin isNever
233 #endif 244 #endif
234 245
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
311 #undef IS_NMSTRT_CHAR_MINBPC 322 #undef IS_NMSTRT_CHAR_MINBPC
312 #undef IS_INVALID_CHAR 323 #undef IS_INVALID_CHAR
313 324
314 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 325 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
315 UTF8_cval1 = 0x00, 326 UTF8_cval1 = 0x00,
316 UTF8_cval2 = 0xc0, 327 UTF8_cval2 = 0xc0,
317 UTF8_cval3 = 0xe0, 328 UTF8_cval3 = 0xe0,
318 UTF8_cval4 = 0xf0 329 UTF8_cval4 = 0xf0
319 }; 330 };
320 331
321 static void PTRCALL 332 void
322 utf8_toUtf8(const ENCODING *enc, 333 align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
334 {
335 const char * fromLim = *fromLimRef;
336 size_t walked = 0;
337 for (; fromLim > from; fromLim--, walked++) {
338 const unsigned char prev = (unsigned char)fromLim[-1];
339 if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
340 if (walked + 1 >= 4) {
341 fromLim += 4 - 1;
342 break;
343 } else {
344 walked = 0;
345 }
346 } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxx x byte */
347 if (walked + 1 >= 3) {
348 fromLim += 3 - 1;
349 break;
350 } else {
351 walked = 0;
352 }
353 } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxx x byte */
354 if (walked + 1 >= 2) {
355 fromLim += 2 - 1;
356 break;
357 } else {
358 walked = 0;
359 }
360 } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxx xx */
361 break;
362 }
363 }
364 *fromLimRef = fromLim;
365 }
366
367 static enum XML_Convert_Result PTRCALL
368 utf8_toUtf8(const ENCODING *UNUSED_P(enc),
323 const char **fromP, const char *fromLim, 369 const char **fromP, const char *fromLim,
324 char **toP, const char *toLim) 370 char **toP, const char *toLim)
325 { 371 {
372 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
326 char *to; 373 char *to;
327 const char *from; 374 const char *from;
328 if (fromLim - *fromP > toLim - *toP) { 375 if (fromLim - *fromP > toLim - *toP) {
329 /* Avoid copying partial characters. */ 376 /* Avoid copying partial characters. */
330 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) 377 res = XML_CONVERT_OUTPUT_EXHAUSTED;
331 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) 378 fromLim = *fromP + (toLim - *toP);
332 break; 379 align_limit_to_full_utf8_characters(*fromP, &fromLim);
333 } 380 }
334 for (to = *toP, from = *fromP; from != fromLim; from++, to++) 381 for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
335 *to = *from; 382 *to = *from;
336 *fromP = from; 383 *fromP = from;
337 *toP = to; 384 *toP = to;
385
386 if ((to == toLim) && (from < fromLim))
387 return XML_CONVERT_OUTPUT_EXHAUSTED;
388 else
389 return res;
338 } 390 }
339 391
340 static void PTRCALL 392 static enum XML_Convert_Result PTRCALL
341 utf8_toUtf16(const ENCODING *enc, 393 utf8_toUtf16(const ENCODING *enc,
342 const char **fromP, const char *fromLim, 394 const char **fromP, const char *fromLim,
343 unsigned short **toP, const unsigned short *toLim) 395 unsigned short **toP, const unsigned short *toLim)
344 { 396 {
397 enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
345 unsigned short *to = *toP; 398 unsigned short *to = *toP;
346 const char *from = *fromP; 399 const char *from = *fromP;
347 while (from != fromLim && to != toLim) { 400 while (from < fromLim && to < toLim) {
348 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { 401 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
349 case BT_LEAD2: 402 case BT_LEAD2:
403 if (fromLim - from < 2) {
404 res = XML_CONVERT_INPUT_INCOMPLETE;
405 break;
406 }
350 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); 407 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
351 from += 2; 408 from += 2;
352 break; 409 break;
353 case BT_LEAD3: 410 case BT_LEAD3:
411 if (fromLim - from < 3) {
412 res = XML_CONVERT_INPUT_INCOMPLETE;
413 break;
414 }
354 *to++ = (unsigned short)(((from[0] & 0xf) << 12) 415 *to++ = (unsigned short)(((from[0] & 0xf) << 12)
355 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); 416 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
356 from += 3; 417 from += 3;
357 break; 418 break;
358 case BT_LEAD4: 419 case BT_LEAD4:
359 { 420 {
360 unsigned long n; 421 unsigned long n;
361 if (to + 1 == toLim) 422 if (toLim - to < 2) {
423 res = XML_CONVERT_OUTPUT_EXHAUSTED;
362 goto after; 424 goto after;
425 }
426 if (fromLim - from < 4) {
427 res = XML_CONVERT_INPUT_INCOMPLETE;
428 goto after;
429 }
363 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) 430 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
364 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); 431 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
365 n -= 0x10000; 432 n -= 0x10000;
366 to[0] = (unsigned short)((n >> 10) | 0xD800); 433 to[0] = (unsigned short)((n >> 10) | 0xD800);
367 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); 434 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
368 to += 2; 435 to += 2;
369 from += 4; 436 from += 4;
370 } 437 }
371 break; 438 break;
372 default: 439 default:
373 *to++ = *from++; 440 *to++ = *from++;
374 break; 441 break;
375 } 442 }
376 } 443 }
377 after: 444 after:
378 *fromP = from; 445 *fromP = from;
379 *toP = to; 446 *toP = to;
447 return res;
380 } 448 }
381 449
382 #ifdef XML_NS 450 #ifdef XML_NS
383 static const struct normal_encoding utf8_encoding_ns = { 451 static const struct normal_encoding utf8_encoding_ns = {
384 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 452 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
385 { 453 {
386 #include "asciitab.h" 454 #include "asciitab.h"
387 #include "utf8tab.h" 455 #include "utf8tab.h"
388 }, 456 },
389 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 457 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
(...skipping 28 matching lines...) Expand all
418 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 486 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
419 { 487 {
420 #define BT_COLON BT_NMSTRT 488 #define BT_COLON BT_NMSTRT
421 #include "iasciitab.h" 489 #include "iasciitab.h"
422 #undef BT_COLON 490 #undef BT_COLON
423 #include "utf8tab.h" 491 #include "utf8tab.h"
424 }, 492 },
425 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 493 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
426 }; 494 };
427 495
428 static void PTRCALL 496 static enum XML_Convert_Result PTRCALL
429 latin1_toUtf8(const ENCODING *enc, 497 latin1_toUtf8(const ENCODING *UNUSED_P(enc),
430 const char **fromP, const char *fromLim, 498 const char **fromP, const char *fromLim,
431 char **toP, const char *toLim) 499 char **toP, const char *toLim)
432 { 500 {
433 for (;;) { 501 for (;;) {
434 unsigned char c; 502 unsigned char c;
435 if (*fromP == fromLim) 503 if (*fromP == fromLim)
436 break; 504 return XML_CONVERT_COMPLETED;
437 c = (unsigned char)**fromP; 505 c = (unsigned char)**fromP;
438 if (c & 0x80) { 506 if (c & 0x80) {
439 if (toLim - *toP < 2) 507 if (toLim - *toP < 2)
440 break; 508 return XML_CONVERT_OUTPUT_EXHAUSTED;
441 *(*toP)++ = (char)((c >> 6) | UTF8_cval2); 509 *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
442 *(*toP)++ = (char)((c & 0x3f) | 0x80); 510 *(*toP)++ = (char)((c & 0x3f) | 0x80);
443 (*fromP)++; 511 (*fromP)++;
444 } 512 }
445 else { 513 else {
446 if (*toP == toLim) 514 if (*toP == toLim)
447 break; 515 return XML_CONVERT_OUTPUT_EXHAUSTED;
448 *(*toP)++ = *(*fromP)++; 516 *(*toP)++ = *(*fromP)++;
449 } 517 }
450 } 518 }
451 } 519 }
452 520
453 static void PTRCALL 521 static enum XML_Convert_Result PTRCALL
454 latin1_toUtf16(const ENCODING *enc, 522 latin1_toUtf16(const ENCODING *UNUSED_P(enc),
455 const char **fromP, const char *fromLim, 523 const char **fromP, const char *fromLim,
456 unsigned short **toP, const unsigned short *toLim) 524 unsigned short **toP, const unsigned short *toLim)
457 { 525 {
458 while (*fromP != fromLim && *toP != toLim) 526 while (*fromP < fromLim && *toP < toLim)
459 *(*toP)++ = (unsigned char)*(*fromP)++; 527 *(*toP)++ = (unsigned char)*(*fromP)++;
528
529 if ((*toP == toLim) && (*fromP < fromLim))
530 return XML_CONVERT_OUTPUT_EXHAUSTED;
531 else
532 return XML_CONVERT_COMPLETED;
460 } 533 }
461 534
462 #ifdef XML_NS 535 #ifdef XML_NS
463 536
464 static const struct normal_encoding latin1_encoding_ns = { 537 static const struct normal_encoding latin1_encoding_ns = {
465 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 538 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
466 { 539 {
467 #include "asciitab.h" 540 #include "asciitab.h"
468 #include "latin1tab.h" 541 #include "latin1tab.h"
469 }, 542 },
470 STANDARD_VTABLE(sb_) 543 STANDARD_VTABLE(sb_) NULL_VTABLE
471 }; 544 };
472 545
473 #endif 546 #endif
474 547
475 static const struct normal_encoding latin1_encoding = { 548 static const struct normal_encoding latin1_encoding = {
476 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 549 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
477 { 550 {
478 #define BT_COLON BT_NMSTRT 551 #define BT_COLON BT_NMSTRT
479 #include "asciitab.h" 552 #include "asciitab.h"
480 #undef BT_COLON 553 #undef BT_COLON
481 #include "latin1tab.h" 554 #include "latin1tab.h"
482 }, 555 },
483 STANDARD_VTABLE(sb_) 556 STANDARD_VTABLE(sb_) NULL_VTABLE
484 }; 557 };
485 558
486 static void PTRCALL 559 static enum XML_Convert_Result PTRCALL
487 ascii_toUtf8(const ENCODING *enc, 560 ascii_toUtf8(const ENCODING *UNUSED_P(enc),
488 const char **fromP, const char *fromLim, 561 const char **fromP, const char *fromLim,
489 char **toP, const char *toLim) 562 char **toP, const char *toLim)
490 { 563 {
491 while (*fromP != fromLim && *toP != toLim) 564 while (*fromP < fromLim && *toP < toLim)
492 *(*toP)++ = *(*fromP)++; 565 *(*toP)++ = *(*fromP)++;
566
567 if ((*toP == toLim) && (*fromP < fromLim))
568 return XML_CONVERT_OUTPUT_EXHAUSTED;
569 else
570 return XML_CONVERT_COMPLETED;
493 } 571 }
494 572
495 #ifdef XML_NS 573 #ifdef XML_NS
496 574
497 static const struct normal_encoding ascii_encoding_ns = { 575 static const struct normal_encoding ascii_encoding_ns = {
498 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 576 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
499 { 577 {
500 #include "asciitab.h" 578 #include "asciitab.h"
501 /* BT_NONXML == 0 */ 579 /* BT_NONXML == 0 */
502 }, 580 },
503 STANDARD_VTABLE(sb_) 581 STANDARD_VTABLE(sb_) NULL_VTABLE
504 }; 582 };
505 583
506 #endif 584 #endif
507 585
508 static const struct normal_encoding ascii_encoding = { 586 static const struct normal_encoding ascii_encoding = {
509 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 587 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
510 { 588 {
511 #define BT_COLON BT_NMSTRT 589 #define BT_COLON BT_NMSTRT
512 #include "asciitab.h" 590 #include "asciitab.h"
513 #undef BT_COLON 591 #undef BT_COLON
514 /* BT_NONXML == 0 */ 592 /* BT_NONXML == 0 */
515 }, 593 },
516 STANDARD_VTABLE(sb_) 594 STANDARD_VTABLE(sb_) NULL_VTABLE
517 }; 595 };
518 596
519 static int PTRFASTCALL 597 static int PTRFASTCALL
520 unicode_byte_type(char hi, char lo) 598 unicode_byte_type(char hi, char lo)
521 { 599 {
522 switch ((unsigned char)hi) { 600 switch ((unsigned char)hi) {
523 case 0xD8: case 0xD9: case 0xDA: case 0xDB: 601 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
524 return BT_LEAD4; 602 return BT_LEAD4;
525 case 0xDC: case 0xDD: case 0xDE: case 0xDF: 603 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
526 return BT_TRAIL; 604 return BT_TRAIL;
527 case 0xFF: 605 case 0xFF:
528 switch ((unsigned char)lo) { 606 switch ((unsigned char)lo) {
529 case 0xFF: 607 case 0xFF:
530 case 0xFE: 608 case 0xFE:
531 return BT_NONXML; 609 return BT_NONXML;
532 } 610 }
533 break; 611 break;
534 } 612 }
535 return BT_NONASCII; 613 return BT_NONASCII;
536 } 614 }
537 615
538 #define DEFINE_UTF16_TO_UTF8(E) \ 616 #define DEFINE_UTF16_TO_UTF8(E) \
539 static void PTRCALL \ 617 static enum XML_Convert_Result PTRCALL \
540 E ## toUtf8(const ENCODING *enc, \ 618 E ## toUtf8(const ENCODING *UNUSED_P(enc), \
541 const char **fromP, const char *fromLim, \ 619 const char **fromP, const char *fromLim, \
542 char **toP, const char *toLim) \ 620 char **toP, const char *toLim) \
543 { \ 621 { \
544 const char *from; \ 622 const char *from = *fromP; \
545 for (from = *fromP; from != fromLim; from += 2) { \ 623 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
624 for (; from < fromLim; from += 2) { \
546 int plane; \ 625 int plane; \
547 unsigned char lo2; \ 626 unsigned char lo2; \
548 unsigned char lo = GET_LO(from); \ 627 unsigned char lo = GET_LO(from); \
549 unsigned char hi = GET_HI(from); \ 628 unsigned char hi = GET_HI(from); \
550 switch (hi) { \ 629 switch (hi) { \
551 case 0: \ 630 case 0: \
552 if (lo < 0x80) { \ 631 if (lo < 0x80) { \
553 if (*toP == toLim) { \ 632 if (*toP == toLim) { \
554 *fromP = from; \ 633 *fromP = from; \
555 return; \ 634 return XML_CONVERT_OUTPUT_EXHAUSTED; \
556 } \ 635 } \
557 *(*toP)++ = lo; \ 636 *(*toP)++ = lo; \
558 break; \ 637 break; \
559 } \ 638 } \
560 /* fall through */ \ 639 /* fall through */ \
561 case 0x1: case 0x2: case 0x3: \ 640 case 0x1: case 0x2: case 0x3: \
562 case 0x4: case 0x5: case 0x6: case 0x7: \ 641 case 0x4: case 0x5: case 0x6: case 0x7: \
563 if (toLim - *toP < 2) { \ 642 if (toLim - *toP < 2) { \
564 *fromP = from; \ 643 *fromP = from; \
565 return; \ 644 return XML_CONVERT_OUTPUT_EXHAUSTED; \
566 } \ 645 } \
567 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ 646 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
568 *(*toP)++ = ((lo & 0x3f) | 0x80); \ 647 *(*toP)++ = ((lo & 0x3f) | 0x80); \
569 break; \ 648 break; \
570 default: \ 649 default: \
571 if (toLim - *toP < 3) { \ 650 if (toLim - *toP < 3) { \
572 *fromP = from; \ 651 *fromP = from; \
573 return; \ 652 return XML_CONVERT_OUTPUT_EXHAUSTED; \
574 } \ 653 } \
575 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ 654 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
576 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ 655 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
577 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ 656 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
578 *(*toP)++ = ((lo & 0x3f) | 0x80); \ 657 *(*toP)++ = ((lo & 0x3f) | 0x80); \
579 break; \ 658 break; \
580 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ 659 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
581 if (toLim - *toP < 4) { \ 660 if (toLim - *toP < 4) { \
582 *fromP = from; \ 661 *fromP = from; \
583 return; \ 662 return XML_CONVERT_OUTPUT_EXHAUSTED; \
663 } \
664 if (fromLim - from < 4) { \
665 *fromP = from; \
666 return XML_CONVERT_INPUT_INCOMPLETE; \
584 } \ 667 } \
585 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ 668 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
586 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ 669 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
587 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ 670 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
588 from += 2; \ 671 from += 2; \
589 lo2 = GET_LO(from); \ 672 lo2 = GET_LO(from); \
590 *(*toP)++ = (((lo & 0x3) << 4) \ 673 *(*toP)++ = (((lo & 0x3) << 4) \
591 | ((GET_HI(from) & 0x3) << 2) \ 674 | ((GET_HI(from) & 0x3) << 2) \
592 | (lo2 >> 6) \ 675 | (lo2 >> 6) \
593 | 0x80); \ 676 | 0x80); \
594 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ 677 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
595 break; \ 678 break; \
596 } \ 679 } \
597 } \ 680 } \
598 *fromP = from; \ 681 *fromP = from; \
682 if (from < fromLim) \
683 return XML_CONVERT_INPUT_INCOMPLETE; \
684 else \
685 return XML_CONVERT_COMPLETED; \
599 } 686 }
600 687
601 #define DEFINE_UTF16_TO_UTF16(E) \ 688 #define DEFINE_UTF16_TO_UTF16(E) \
602 static void PTRCALL \ 689 static enum XML_Convert_Result PTRCALL \
603 E ## toUtf16(const ENCODING *enc, \ 690 E ## toUtf16(const ENCODING *UNUSED_P(enc), \
604 const char **fromP, const char *fromLim, \ 691 const char **fromP, const char *fromLim, \
605 unsigned short **toP, const unsigned short *toLim) \ 692 unsigned short **toP, const unsigned short *toLim) \
606 { \ 693 { \
694 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
695 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
607 /* Avoid copying first half only of surrogate */ \ 696 /* Avoid copying first half only of surrogate */ \
608 if (fromLim - *fromP > ((toLim - *toP) << 1) \ 697 if (fromLim - *fromP > ((toLim - *toP) << 1) \
609 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ 698 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
610 fromLim -= 2; \ 699 fromLim -= 2; \
611 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ 700 res = XML_CONVERT_INPUT_INCOMPLETE; \
701 } \
702 for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
612 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ 703 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
704 if ((*toP == toLim) && (*fromP < fromLim)) \
705 return XML_CONVERT_OUTPUT_EXHAUSTED; \
706 else \
707 return res; \
613 } 708 }
614 709
615 #define SET2(ptr, ch) \ 710 #define SET2(ptr, ch) \
616 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) 711 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
617 #define GET_LO(ptr) ((unsigned char)(ptr)[0]) 712 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
618 #define GET_HI(ptr) ((unsigned char)(ptr)[1]) 713 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
619 714
620 DEFINE_UTF16_TO_UTF8(little2_) 715 DEFINE_UTF16_TO_UTF8(little2_)
621 DEFINE_UTF16_TO_UTF16(little2_) 716 DEFINE_UTF16_TO_UTF16(little2_)
622 717
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
719 #if BYTEORDER == 1234 814 #if BYTEORDER == 1234
720 1 815 1
721 #else 816 #else
722 0 817 0
723 #endif 818 #endif
724 }, 819 },
725 { 820 {
726 #include "asciitab.h" 821 #include "asciitab.h"
727 #include "latin1tab.h" 822 #include "latin1tab.h"
728 }, 823 },
729 STANDARD_VTABLE(little2_) 824 STANDARD_VTABLE(little2_) NULL_VTABLE
730 }; 825 };
731 826
732 #endif 827 #endif
733 828
734 static const struct normal_encoding little2_encoding = { 829 static const struct normal_encoding little2_encoding = {
735 { VTABLE, 2, 0, 830 { VTABLE, 2, 0,
736 #if BYTEORDER == 1234 831 #if BYTEORDER == 1234
737 1 832 1
738 #else 833 #else
739 0 834 0
740 #endif 835 #endif
741 }, 836 },
742 { 837 {
743 #define BT_COLON BT_NMSTRT 838 #define BT_COLON BT_NMSTRT
744 #include "asciitab.h" 839 #include "asciitab.h"
745 #undef BT_COLON 840 #undef BT_COLON
746 #include "latin1tab.h" 841 #include "latin1tab.h"
747 }, 842 },
748 STANDARD_VTABLE(little2_) 843 STANDARD_VTABLE(little2_) NULL_VTABLE
749 }; 844 };
750 845
751 #if BYTEORDER != 4321 846 #if BYTEORDER != 4321
752 847
753 #ifdef XML_NS 848 #ifdef XML_NS
754 849
755 static const struct normal_encoding internal_little2_encoding_ns = { 850 static const struct normal_encoding internal_little2_encoding_ns = {
756 { VTABLE, 2, 0, 1 }, 851 { VTABLE, 2, 0, 1 },
757 { 852 {
758 #include "iasciitab.h" 853 #include "iasciitab.h"
759 #include "latin1tab.h" 854 #include "latin1tab.h"
760 }, 855 },
761 STANDARD_VTABLE(little2_) 856 STANDARD_VTABLE(little2_) NULL_VTABLE
762 }; 857 };
763 858
764 #endif 859 #endif
765 860
766 static const struct normal_encoding internal_little2_encoding = { 861 static const struct normal_encoding internal_little2_encoding = {
767 { VTABLE, 2, 0, 1 }, 862 { VTABLE, 2, 0, 1 },
768 { 863 {
769 #define BT_COLON BT_NMSTRT 864 #define BT_COLON BT_NMSTRT
770 #include "iasciitab.h" 865 #include "iasciitab.h"
771 #undef BT_COLON 866 #undef BT_COLON
772 #include "latin1tab.h" 867 #include "latin1tab.h"
773 }, 868 },
774 STANDARD_VTABLE(little2_) 869 STANDARD_VTABLE(little2_) NULL_VTABLE
775 }; 870 };
776 871
777 #endif 872 #endif
778 873
779 874
780 #define BIG2_BYTE_TYPE(enc, p) \ 875 #define BIG2_BYTE_TYPE(enc, p) \
781 ((p)[0] == 0 \ 876 ((p)[0] == 0 \
782 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ 877 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
783 : unicode_byte_type((p)[0], (p)[1])) 878 : unicode_byte_type((p)[0], (p)[1]))
784 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) 879 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
860 #if BYTEORDER == 4321 955 #if BYTEORDER == 4321
861 1 956 1
862 #else 957 #else
863 0 958 0
864 #endif 959 #endif
865 }, 960 },
866 { 961 {
867 #include "asciitab.h" 962 #include "asciitab.h"
868 #include "latin1tab.h" 963 #include "latin1tab.h"
869 }, 964 },
870 STANDARD_VTABLE(big2_) 965 STANDARD_VTABLE(big2_) NULL_VTABLE
871 }; 966 };
872 967
873 #endif 968 #endif
874 969
875 static const struct normal_encoding big2_encoding = { 970 static const struct normal_encoding big2_encoding = {
876 { VTABLE, 2, 0, 971 { VTABLE, 2, 0,
877 #if BYTEORDER == 4321 972 #if BYTEORDER == 4321
878 1 973 1
879 #else 974 #else
880 0 975 0
881 #endif 976 #endif
882 }, 977 },
883 { 978 {
884 #define BT_COLON BT_NMSTRT 979 #define BT_COLON BT_NMSTRT
885 #include "asciitab.h" 980 #include "asciitab.h"
886 #undef BT_COLON 981 #undef BT_COLON
887 #include "latin1tab.h" 982 #include "latin1tab.h"
888 }, 983 },
889 STANDARD_VTABLE(big2_) 984 STANDARD_VTABLE(big2_) NULL_VTABLE
890 }; 985 };
891 986
892 #if BYTEORDER != 1234 987 #if BYTEORDER != 1234
893 988
894 #ifdef XML_NS 989 #ifdef XML_NS
895 990
896 static const struct normal_encoding internal_big2_encoding_ns = { 991 static const struct normal_encoding internal_big2_encoding_ns = {
897 { VTABLE, 2, 0, 1 }, 992 { VTABLE, 2, 0, 1 },
898 { 993 {
899 #include "iasciitab.h" 994 #include "iasciitab.h"
900 #include "latin1tab.h" 995 #include "latin1tab.h"
901 }, 996 },
902 STANDARD_VTABLE(big2_) 997 STANDARD_VTABLE(big2_) NULL_VTABLE
903 }; 998 };
904 999
905 #endif 1000 #endif
906 1001
907 static const struct normal_encoding internal_big2_encoding = { 1002 static const struct normal_encoding internal_big2_encoding = {
908 { VTABLE, 2, 0, 1 }, 1003 { VTABLE, 2, 0, 1 },
909 { 1004 {
910 #define BT_COLON BT_NMSTRT 1005 #define BT_COLON BT_NMSTRT
911 #include "iasciitab.h" 1006 #include "iasciitab.h"
912 #undef BT_COLON 1007 #undef BT_COLON
913 #include "latin1tab.h" 1008 #include "latin1tab.h"
914 }, 1009 },
915 STANDARD_VTABLE(big2_) 1010 STANDARD_VTABLE(big2_) NULL_VTABLE
916 }; 1011 };
917 1012
918 #endif 1013 #endif
919 1014
920 #undef PREFIX 1015 #undef PREFIX
921 1016
922 static int FASTCALL 1017 static int FASTCALL
923 streqci(const char *s1, const char *s2) 1018 streqci(const char *s1, const char *s2)
924 { 1019 {
925 for (;;) { 1020 for (;;) {
926 char c1 = *s1++; 1021 char c1 = *s1++;
927 char c2 = *s2++; 1022 char c2 = *s2++;
928 if (ASCII_a <= c1 && c1 <= ASCII_z) 1023 if (ASCII_a <= c1 && c1 <= ASCII_z)
929 c1 += ASCII_A - ASCII_a; 1024 c1 += ASCII_A - ASCII_a;
930 if (ASCII_a <= c2 && c2 <= ASCII_z) 1025 if (ASCII_a <= c2 && c2 <= ASCII_z)
931 c2 += ASCII_A - ASCII_a; 1026 c2 += ASCII_A - ASCII_a;
932 if (c1 != c2) 1027 if (c1 != c2)
933 return 0; 1028 return 0;
934 if (!c1) 1029 if (!c1)
935 break; 1030 break;
936 } 1031 }
937 return 1; 1032 return 1;
938 } 1033 }
939 1034
940 static void PTRCALL 1035 static void PTRCALL
941 initUpdatePosition(const ENCODING *enc, const char *ptr, 1036 initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
942 const char *end, POSITION *pos) 1037 const char *end, POSITION *pos)
943 { 1038 {
944 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); 1039 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
945 } 1040 }
946 1041
947 static int 1042 static int
948 toAscii(const ENCODING *enc, const char *ptr, const char *end) 1043 toAscii(const ENCODING *enc, const char *ptr, const char *end)
949 { 1044 {
950 char buf[1]; 1045 char buf[1];
951 char *p = buf; 1046 char *p = buf;
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
1281 } 1376 }
1282 1377
1283 static int PTRFASTCALL 1378 static int PTRFASTCALL
1284 unknown_isInvalid(const ENCODING *enc, const char *p) 1379 unknown_isInvalid(const ENCODING *enc, const char *p)
1285 { 1380 {
1286 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1381 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1287 int c = uenc->convert(uenc->userData, p); 1382 int c = uenc->convert(uenc->userData, p);
1288 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 1383 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1289 } 1384 }
1290 1385
1291 static void PTRCALL 1386 static enum XML_Convert_Result PTRCALL
1292 unknown_toUtf8(const ENCODING *enc, 1387 unknown_toUtf8(const ENCODING *enc,
1293 const char **fromP, const char *fromLim, 1388 const char **fromP, const char *fromLim,
1294 char **toP, const char *toLim) 1389 char **toP, const char *toLim)
1295 { 1390 {
1296 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1391 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1297 char buf[XML_UTF8_ENCODE_MAX]; 1392 char buf[XML_UTF8_ENCODE_MAX];
1298 for (;;) { 1393 for (;;) {
1299 const char *utf8; 1394 const char *utf8;
1300 int n; 1395 int n;
1301 if (*fromP == fromLim) 1396 if (*fromP == fromLim)
1302 break; 1397 return XML_CONVERT_COMPLETED;
1303 utf8 = uenc->utf8[(unsigned char)**fromP]; 1398 utf8 = uenc->utf8[(unsigned char)**fromP];
1304 n = *utf8++; 1399 n = *utf8++;
1305 if (n == 0) { 1400 if (n == 0) {
1306 int c = uenc->convert(uenc->userData, *fromP); 1401 int c = uenc->convert(uenc->userData, *fromP);
1307 n = XmlUtf8Encode(c, buf); 1402 n = XmlUtf8Encode(c, buf);
1308 if (n > toLim - *toP) 1403 if (n > toLim - *toP)
1309 break; 1404 return XML_CONVERT_OUTPUT_EXHAUSTED;
1310 utf8 = buf; 1405 utf8 = buf;
1311 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 1406 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1312 - (BT_LEAD2 - 2)); 1407 - (BT_LEAD2 - 2));
1313 } 1408 }
1314 else { 1409 else {
1315 if (n > toLim - *toP) 1410 if (n > toLim - *toP)
1316 break; 1411 return XML_CONVERT_OUTPUT_EXHAUSTED;
1317 (*fromP)++; 1412 (*fromP)++;
1318 } 1413 }
1319 do { 1414 do {
1320 *(*toP)++ = *utf8++; 1415 *(*toP)++ = *utf8++;
1321 } while (--n != 0); 1416 } while (--n != 0);
1322 } 1417 }
1323 } 1418 }
1324 1419
1325 static void PTRCALL 1420 static enum XML_Convert_Result PTRCALL
1326 unknown_toUtf16(const ENCODING *enc, 1421 unknown_toUtf16(const ENCODING *enc,
1327 const char **fromP, const char *fromLim, 1422 const char **fromP, const char *fromLim,
1328 unsigned short **toP, const unsigned short *toLim) 1423 unsigned short **toP, const unsigned short *toLim)
1329 { 1424 {
1330 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1425 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1331 while (*fromP != fromLim && *toP != toLim) { 1426 while (*fromP < fromLim && *toP < toLim) {
1332 unsigned short c = uenc->utf16[(unsigned char)**fromP]; 1427 unsigned short c = uenc->utf16[(unsigned char)**fromP];
1333 if (c == 0) { 1428 if (c == 0) {
1334 c = (unsigned short) 1429 c = (unsigned short)
1335 uenc->convert(uenc->userData, *fromP); 1430 uenc->convert(uenc->userData, *fromP);
1336 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 1431 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1337 - (BT_LEAD2 - 2)); 1432 - (BT_LEAD2 - 2));
1338 } 1433 }
1339 else 1434 else
1340 (*fromP)++; 1435 (*fromP)++;
1341 *(*toP)++ = c; 1436 *(*toP)++ = c;
1342 } 1437 }
1438
1439 if ((*toP == toLim) && (*fromP < fromLim))
1440 return XML_CONVERT_OUTPUT_EXHAUSTED;
1441 else
1442 return XML_CONVERT_COMPLETED;
1343 } 1443 }
1344 1444
1345 ENCODING * 1445 ENCODING *
1346 XmlInitUnknownEncoding(void *mem, 1446 XmlInitUnknownEncoding(void *mem,
1347 int *table, 1447 int *table,
1348 CONVERTER convert, 1448 CONVERTER convert,
1349 void *userData) 1449 void *userData)
1350 { 1450 {
1351 int i; 1451 int i;
1352 struct unknown_encoding *e = (struct unknown_encoding *)mem; 1452 struct unknown_encoding *e = (struct unknown_encoding *)mem;
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
1496 static int 1596 static int
1497 initScan(const ENCODING * const *encodingTable, 1597 initScan(const ENCODING * const *encodingTable,
1498 const INIT_ENCODING *enc, 1598 const INIT_ENCODING *enc,
1499 int state, 1599 int state,
1500 const char *ptr, 1600 const char *ptr,
1501 const char *end, 1601 const char *end,
1502 const char **nextTokPtr) 1602 const char **nextTokPtr)
1503 { 1603 {
1504 const ENCODING **encPtr; 1604 const ENCODING **encPtr;
1505 1605
1506 if (ptr == end) 1606 if (ptr >= end)
1507 return XML_TOK_NONE; 1607 return XML_TOK_NONE;
1508 encPtr = enc->encPtr; 1608 encPtr = enc->encPtr;
1509 if (ptr + 1 == end) { 1609 if (ptr + 1 == end) {
1510 /* only a single byte available for auto-detection */ 1610 /* only a single byte available for auto-detection */
1511 #ifndef XML_DTD /* FIXME */ 1611 #ifndef XML_DTD /* FIXME */
1512 /* a well-formed document entity must have more than one byte */ 1612 /* a well-formed document entity must have more than one byte */
1513 if (state != XML_CONTENT_STATE) 1613 if (state != XML_CONTENT_STATE)
1514 return XML_TOK_PARTIAL; 1614 return XML_TOK_PARTIAL;
1515 #endif 1615 #endif
1516 /* so we're parsing an external text entity... */ 1616 /* so we're parsing an external text entity... */
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
1642 CONVERTER convert, 1742 CONVERTER convert,
1643 void *userData) 1743 void *userData)
1644 { 1744 {
1645 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); 1745 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1646 if (enc) 1746 if (enc)
1647 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; 1747 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1648 return enc; 1748 return enc;
1649 } 1749 }
1650 1750
1651 #endif /* XML_NS */ 1751 #endif /* XML_NS */
OLDNEW
« no previous file with comments | « third_party/expat/files/lib/xmltok.c ('k') | third_party/expat/files/lib/xmltok_impl.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698