OLD | NEW |
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd | 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
2 See the file COPYING for copying permission. | 2 See the file COPYING for copying permission. |
3 */ | 3 */ |
4 | 4 |
5 #include <stddef.h> | 5 #include <stddef.h> |
6 | 6 |
7 #ifdef COMPILED_FROM_DSP | 7 #ifdef WIN32 |
8 #include "winconfig.h" | 8 #include "winconfig.h" |
9 #elif defined(MACOS_CLASSIC) | 9 #elif defined(MACOS_CLASSIC) |
10 #include "macconfig.h" | 10 #include "macconfig.h" |
11 #elif defined(__amigaos__) | 11 #elif defined(__amigaos__) |
12 #include "amigaconfig.h" | 12 #include "amigaconfig.h" |
13 #elif defined(__WATCOMC__) | 13 #elif defined(__WATCOMC__) |
14 #include "watcomconfig.h" | 14 #include "watcomconfig.h" |
15 #else | 15 #else |
16 #ifdef HAVE_EXPAT_CONFIG_H | 16 #ifdef HAVE_EXPAT_CONFIG_H |
17 #include <expat_config.h> | 17 #include <expat_config.h> |
18 #endif | 18 #endif |
19 #endif /* ndef COMPILED_FROM_DSP */ | 19 #endif /* ndef WIN32 */ |
20 | 20 |
21 #include "expat_external.h" | 21 #include "expat_external.h" |
22 #include "internal.h" | 22 #include "internal.h" |
23 #include "xmltok.h" | 23 #include "xmltok.h" |
24 #include "nametab.h" | 24 #include "nametab.h" |
25 | 25 |
26 #ifdef XML_DTD | 26 #ifdef XML_DTD |
27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) | 27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) |
28 #else | 28 #else |
29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ | 29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ |
30 #endif | 30 #endif |
31 | 31 |
32 #define VTABLE1 \ | 32 #define VTABLE1 \ |
33 { PREFIX(prologTok), PREFIX(contentTok), \ | 33 { PREFIX(prologTok), PREFIX(contentTok), \ |
34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ | 34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ |
35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ | 35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ |
36 PREFIX(sameName), \ | 36 PREFIX(sameName), \ |
37 PREFIX(nameMatchesAscii), \ | 37 PREFIX(nameMatchesAscii), \ |
38 PREFIX(nameLength), \ | 38 PREFIX(nameLength), \ |
39 PREFIX(skipS), \ | 39 PREFIX(skipS), \ |
40 PREFIX(getAtts), \ | 40 PREFIX(getAtts), \ |
41 PREFIX(charRefNumber), \ | 41 PREFIX(charRefNumber), \ |
42 PREFIX(predefinedEntityName), \ | 42 PREFIX(predefinedEntityName), \ |
43 PREFIX(updatePosition), \ | 43 PREFIX(updatePosition), \ |
44 PREFIX(isPublicId) | 44 PREFIX(isPublicId) |
45 | 45 |
46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) | 46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) |
47 | 47 |
48 #define UCS2_GET_NAMING(pages, hi, lo) \ | 48 #define UCS2_GET_NAMING(pages, hi, lo) \ |
49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) | 49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) |
50 | 50 |
51 /* A 2 byte UTF-8 representation splits the characters 11 bits between | 51 /* A 2 byte UTF-8 representation splits the characters 11 bits between |
52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into | 52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into |
53 pages, 3 bits to add to that index and 5 bits to generate the mask. | 53 pages, 3 bits to add to that index and 5 bits to generate the mask. |
54 */ | 54 */ |
55 #define UTF8_GET_NAMING2(pages, byte) \ | 55 #define UTF8_GET_NAMING2(pages, byte) \ |
56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ | 56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ |
57 + ((((byte)[0]) & 3) << 1) \ | 57 + ((((byte)[0]) & 3) << 1) \ |
58 + ((((byte)[1]) >> 5) & 1)] \ | 58 + ((((byte)[1]) >> 5) & 1)] \ |
59 & (1 << (((byte)[1]) & 0x1F))) | 59 & (1u << (((byte)[1]) & 0x1F))) |
60 | 60 |
61 /* A 3 byte UTF-8 representation splits the characters 16 bits between | 61 /* A 3 byte UTF-8 representation splits the characters 16 bits between |
62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index | 62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index |
63 into pages, 3 bits to add to that index and 5 bits to generate the | 63 into pages, 3 bits to add to that index and 5 bits to generate the |
64 mask. | 64 mask. |
65 */ | 65 */ |
66 #define UTF8_GET_NAMING3(pages, byte) \ | 66 #define UTF8_GET_NAMING3(pages, byte) \ |
67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ | 67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ |
68 + ((((byte)[1]) >> 2) & 0xF)] \ | 68 + ((((byte)[1]) >> 2) & 0xF)] \ |
69 << 3) \ | 69 << 3) \ |
70 + ((((byte)[1]) & 3) << 1) \ | 70 + ((((byte)[1]) & 3) << 1) \ |
71 + ((((byte)[2]) >> 5) & 1)] \ | 71 + ((((byte)[2]) >> 5) & 1)] \ |
72 & (1 << (((byte)[2]) & 0x1F))) | 72 & (1u << (((byte)[2]) & 0x1F))) |
73 | 73 |
74 #define UTF8_GET_NAMING(pages, p, n) \ | 74 #define UTF8_GET_NAMING(pages, p, n) \ |
75 ((n) == 2 \ | 75 ((n) == 2 \ |
76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ | 76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ |
77 : ((n) == 3 \ | 77 : ((n) == 3 \ |
78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ | 78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ |
79 : 0)) | 79 : 0)) |
80 | 80 |
81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B | 81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B |
82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ | 82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
115 || \ | 115 || \ |
116 ((*p) == 0xF0 \ | 116 ((*p) == 0xF0 \ |
117 ? \ | 117 ? \ |
118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ | 118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ |
119 : \ | 119 : \ |
120 ((p)[1] & 0x80) == 0 \ | 120 ((p)[1] & 0x80) == 0 \ |
121 || \ | 121 || \ |
122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) | 122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) |
123 | 123 |
124 static int PTRFASTCALL | 124 static int PTRFASTCALL |
125 isNever(const ENCODING *enc, const char *p) | 125 isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p)) |
126 { | 126 { |
127 return 0; | 127 return 0; |
128 } | 128 } |
129 | 129 |
130 static int PTRFASTCALL | 130 static int PTRFASTCALL |
131 utf8_isName2(const ENCODING *enc, const char *p) | 131 utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p) |
132 { | 132 { |
133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); | 133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); |
134 } | 134 } |
135 | 135 |
136 static int PTRFASTCALL | 136 static int PTRFASTCALL |
137 utf8_isName3(const ENCODING *enc, const char *p) | 137 utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p) |
138 { | 138 { |
139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); | 139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); |
140 } | 140 } |
141 | 141 |
142 #define utf8_isName4 isNever | 142 #define utf8_isName4 isNever |
143 | 143 |
144 static int PTRFASTCALL | 144 static int PTRFASTCALL |
145 utf8_isNmstrt2(const ENCODING *enc, const char *p) | 145 utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p) |
146 { | 146 { |
147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); | 147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); |
148 } | 148 } |
149 | 149 |
150 static int PTRFASTCALL | 150 static int PTRFASTCALL |
151 utf8_isNmstrt3(const ENCODING *enc, const char *p) | 151 utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p) |
152 { | 152 { |
153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); | 153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); |
154 } | 154 } |
155 | 155 |
156 #define utf8_isNmstrt4 isNever | 156 #define utf8_isNmstrt4 isNever |
157 | 157 |
158 static int PTRFASTCALL | 158 static int PTRFASTCALL |
159 utf8_isInvalid2(const ENCODING *enc, const char *p) | 159 utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p) |
160 { | 160 { |
161 return UTF8_INVALID2((const unsigned char *)p); | 161 return UTF8_INVALID2((const unsigned char *)p); |
162 } | 162 } |
163 | 163 |
164 static int PTRFASTCALL | 164 static int PTRFASTCALL |
165 utf8_isInvalid3(const ENCODING *enc, const char *p) | 165 utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p) |
166 { | 166 { |
167 return UTF8_INVALID3((const unsigned char *)p); | 167 return UTF8_INVALID3((const unsigned char *)p); |
168 } | 168 } |
169 | 169 |
170 static int PTRFASTCALL | 170 static int PTRFASTCALL |
171 utf8_isInvalid4(const ENCODING *enc, const char *p) | 171 utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p) |
172 { | 172 { |
173 return UTF8_INVALID4((const unsigned char *)p); | 173 return UTF8_INVALID4((const unsigned char *)p); |
174 } | 174 } |
175 | 175 |
176 struct normal_encoding { | 176 struct normal_encoding { |
177 ENCODING enc; | 177 ENCODING enc; |
178 unsigned char type[256]; | 178 unsigned char type[256]; |
179 #ifdef XML_MIN_SIZE | 179 #ifdef XML_MIN_SIZE |
180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *); | 180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *); |
181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); | 181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
215 E ## isName2, \ | 215 E ## isName2, \ |
216 E ## isName3, \ | 216 E ## isName3, \ |
217 E ## isName4, \ | 217 E ## isName4, \ |
218 E ## isNmstrt2, \ | 218 E ## isNmstrt2, \ |
219 E ## isNmstrt3, \ | 219 E ## isNmstrt3, \ |
220 E ## isNmstrt4, \ | 220 E ## isNmstrt4, \ |
221 E ## isInvalid2, \ | 221 E ## isInvalid2, \ |
222 E ## isInvalid3, \ | 222 E ## isInvalid3, \ |
223 E ## isInvalid4 | 223 E ## isInvalid4 |
224 | 224 |
| 225 #define NULL_VTABLE \ |
| 226 /* isName2 */ NULL, \ |
| 227 /* isName3 */ NULL, \ |
| 228 /* isName4 */ NULL, \ |
| 229 /* isNmstrt2 */ NULL, \ |
| 230 /* isNmstrt3 */ NULL, \ |
| 231 /* isNmstrt4 */ NULL, \ |
| 232 /* isInvalid2 */ NULL, \ |
| 233 /* isInvalid3 */ NULL, \ |
| 234 /* isInvalid4 */ NULL |
| 235 |
225 static int FASTCALL checkCharRefNumber(int); | 236 static int FASTCALL checkCharRefNumber(int); |
226 | 237 |
227 #include "xmltok_impl.h" | 238 #include "xmltok_impl.h" |
228 #include "ascii.h" | 239 #include "ascii.h" |
229 | 240 |
230 #ifdef XML_MIN_SIZE | 241 #ifdef XML_MIN_SIZE |
231 #define sb_isNameMin isNever | 242 #define sb_isNameMin isNever |
232 #define sb_isNmstrtMin isNever | 243 #define sb_isNmstrtMin isNever |
233 #endif | 244 #endif |
234 | 245 |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
311 #undef IS_NMSTRT_CHAR_MINBPC | 322 #undef IS_NMSTRT_CHAR_MINBPC |
312 #undef IS_INVALID_CHAR | 323 #undef IS_INVALID_CHAR |
313 | 324 |
314 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ | 325 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ |
315 UTF8_cval1 = 0x00, | 326 UTF8_cval1 = 0x00, |
316 UTF8_cval2 = 0xc0, | 327 UTF8_cval2 = 0xc0, |
317 UTF8_cval3 = 0xe0, | 328 UTF8_cval3 = 0xe0, |
318 UTF8_cval4 = 0xf0 | 329 UTF8_cval4 = 0xf0 |
319 }; | 330 }; |
320 | 331 |
321 static void PTRCALL | 332 void |
322 utf8_toUtf8(const ENCODING *enc, | 333 align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef) |
| 334 { |
| 335 const char * fromLim = *fromLimRef; |
| 336 size_t walked = 0; |
| 337 for (; fromLim > from; fromLim--, walked++) { |
| 338 const unsigned char prev = (unsigned char)fromLim[-1]; |
| 339 if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte
*/ |
| 340 if (walked + 1 >= 4) { |
| 341 fromLim += 4 - 1; |
| 342 break; |
| 343 } else { |
| 344 walked = 0; |
| 345 } |
| 346 } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxx
x byte */ |
| 347 if (walked + 1 >= 3) { |
| 348 fromLim += 3 - 1; |
| 349 break; |
| 350 } else { |
| 351 walked = 0; |
| 352 } |
| 353 } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxx
x byte */ |
| 354 if (walked + 1 >= 2) { |
| 355 fromLim += 2 - 1; |
| 356 break; |
| 357 } else { |
| 358 walked = 0; |
| 359 } |
| 360 } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxx
xx */ |
| 361 break; |
| 362 } |
| 363 } |
| 364 *fromLimRef = fromLim; |
| 365 } |
| 366 |
| 367 static enum XML_Convert_Result PTRCALL |
| 368 utf8_toUtf8(const ENCODING *UNUSED_P(enc), |
323 const char **fromP, const char *fromLim, | 369 const char **fromP, const char *fromLim, |
324 char **toP, const char *toLim) | 370 char **toP, const char *toLim) |
325 { | 371 { |
| 372 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; |
326 char *to; | 373 char *to; |
327 const char *from; | 374 const char *from; |
328 if (fromLim - *fromP > toLim - *toP) { | 375 if (fromLim - *fromP > toLim - *toP) { |
329 /* Avoid copying partial characters. */ | 376 /* Avoid copying partial characters. */ |
330 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) | 377 res = XML_CONVERT_OUTPUT_EXHAUSTED; |
331 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) | 378 fromLim = *fromP + (toLim - *toP); |
332 break; | 379 align_limit_to_full_utf8_characters(*fromP, &fromLim); |
333 } | 380 } |
334 for (to = *toP, from = *fromP; from != fromLim; from++, to++) | 381 for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) |
335 *to = *from; | 382 *to = *from; |
336 *fromP = from; | 383 *fromP = from; |
337 *toP = to; | 384 *toP = to; |
| 385 |
| 386 if ((to == toLim) && (from < fromLim)) |
| 387 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 388 else |
| 389 return res; |
338 } | 390 } |
339 | 391 |
340 static void PTRCALL | 392 static enum XML_Convert_Result PTRCALL |
341 utf8_toUtf16(const ENCODING *enc, | 393 utf8_toUtf16(const ENCODING *enc, |
342 const char **fromP, const char *fromLim, | 394 const char **fromP, const char *fromLim, |
343 unsigned short **toP, const unsigned short *toLim) | 395 unsigned short **toP, const unsigned short *toLim) |
344 { | 396 { |
| 397 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; |
345 unsigned short *to = *toP; | 398 unsigned short *to = *toP; |
346 const char *from = *fromP; | 399 const char *from = *fromP; |
347 while (from != fromLim && to != toLim) { | 400 while (from < fromLim && to < toLim) { |
348 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { | 401 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { |
349 case BT_LEAD2: | 402 case BT_LEAD2: |
| 403 if (fromLim - from < 2) { |
| 404 res = XML_CONVERT_INPUT_INCOMPLETE; |
| 405 break; |
| 406 } |
350 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); | 407 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); |
351 from += 2; | 408 from += 2; |
352 break; | 409 break; |
353 case BT_LEAD3: | 410 case BT_LEAD3: |
| 411 if (fromLim - from < 3) { |
| 412 res = XML_CONVERT_INPUT_INCOMPLETE; |
| 413 break; |
| 414 } |
354 *to++ = (unsigned short)(((from[0] & 0xf) << 12) | 415 *to++ = (unsigned short)(((from[0] & 0xf) << 12) |
355 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); | 416 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); |
356 from += 3; | 417 from += 3; |
357 break; | 418 break; |
358 case BT_LEAD4: | 419 case BT_LEAD4: |
359 { | 420 { |
360 unsigned long n; | 421 unsigned long n; |
361 if (to + 1 == toLim) | 422 if (toLim - to < 2) { |
| 423 res = XML_CONVERT_OUTPUT_EXHAUSTED; |
362 goto after; | 424 goto after; |
| 425 } |
| 426 if (fromLim - from < 4) { |
| 427 res = XML_CONVERT_INPUT_INCOMPLETE; |
| 428 goto after; |
| 429 } |
363 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | 430 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) |
364 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); | 431 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); |
365 n -= 0x10000; | 432 n -= 0x10000; |
366 to[0] = (unsigned short)((n >> 10) | 0xD800); | 433 to[0] = (unsigned short)((n >> 10) | 0xD800); |
367 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); | 434 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); |
368 to += 2; | 435 to += 2; |
369 from += 4; | 436 from += 4; |
370 } | 437 } |
371 break; | 438 break; |
372 default: | 439 default: |
373 *to++ = *from++; | 440 *to++ = *from++; |
374 break; | 441 break; |
375 } | 442 } |
376 } | 443 } |
377 after: | 444 after: |
378 *fromP = from; | 445 *fromP = from; |
379 *toP = to; | 446 *toP = to; |
| 447 return res; |
380 } | 448 } |
381 | 449 |
382 #ifdef XML_NS | 450 #ifdef XML_NS |
383 static const struct normal_encoding utf8_encoding_ns = { | 451 static const struct normal_encoding utf8_encoding_ns = { |
384 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, | 452 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, |
385 { | 453 { |
386 #include "asciitab.h" | 454 #include "asciitab.h" |
387 #include "utf8tab.h" | 455 #include "utf8tab.h" |
388 }, | 456 }, |
389 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) | 457 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) |
(...skipping 28 matching lines...) Expand all Loading... |
418 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, | 486 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, |
419 { | 487 { |
420 #define BT_COLON BT_NMSTRT | 488 #define BT_COLON BT_NMSTRT |
421 #include "iasciitab.h" | 489 #include "iasciitab.h" |
422 #undef BT_COLON | 490 #undef BT_COLON |
423 #include "utf8tab.h" | 491 #include "utf8tab.h" |
424 }, | 492 }, |
425 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) | 493 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) |
426 }; | 494 }; |
427 | 495 |
428 static void PTRCALL | 496 static enum XML_Convert_Result PTRCALL |
429 latin1_toUtf8(const ENCODING *enc, | 497 latin1_toUtf8(const ENCODING *UNUSED_P(enc), |
430 const char **fromP, const char *fromLim, | 498 const char **fromP, const char *fromLim, |
431 char **toP, const char *toLim) | 499 char **toP, const char *toLim) |
432 { | 500 { |
433 for (;;) { | 501 for (;;) { |
434 unsigned char c; | 502 unsigned char c; |
435 if (*fromP == fromLim) | 503 if (*fromP == fromLim) |
436 break; | 504 return XML_CONVERT_COMPLETED; |
437 c = (unsigned char)**fromP; | 505 c = (unsigned char)**fromP; |
438 if (c & 0x80) { | 506 if (c & 0x80) { |
439 if (toLim - *toP < 2) | 507 if (toLim - *toP < 2) |
440 break; | 508 return XML_CONVERT_OUTPUT_EXHAUSTED; |
441 *(*toP)++ = (char)((c >> 6) | UTF8_cval2); | 509 *(*toP)++ = (char)((c >> 6) | UTF8_cval2); |
442 *(*toP)++ = (char)((c & 0x3f) | 0x80); | 510 *(*toP)++ = (char)((c & 0x3f) | 0x80); |
443 (*fromP)++; | 511 (*fromP)++; |
444 } | 512 } |
445 else { | 513 else { |
446 if (*toP == toLim) | 514 if (*toP == toLim) |
447 break; | 515 return XML_CONVERT_OUTPUT_EXHAUSTED; |
448 *(*toP)++ = *(*fromP)++; | 516 *(*toP)++ = *(*fromP)++; |
449 } | 517 } |
450 } | 518 } |
451 } | 519 } |
452 | 520 |
453 static void PTRCALL | 521 static enum XML_Convert_Result PTRCALL |
454 latin1_toUtf16(const ENCODING *enc, | 522 latin1_toUtf16(const ENCODING *UNUSED_P(enc), |
455 const char **fromP, const char *fromLim, | 523 const char **fromP, const char *fromLim, |
456 unsigned short **toP, const unsigned short *toLim) | 524 unsigned short **toP, const unsigned short *toLim) |
457 { | 525 { |
458 while (*fromP != fromLim && *toP != toLim) | 526 while (*fromP < fromLim && *toP < toLim) |
459 *(*toP)++ = (unsigned char)*(*fromP)++; | 527 *(*toP)++ = (unsigned char)*(*fromP)++; |
| 528 |
| 529 if ((*toP == toLim) && (*fromP < fromLim)) |
| 530 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 531 else |
| 532 return XML_CONVERT_COMPLETED; |
460 } | 533 } |
461 | 534 |
462 #ifdef XML_NS | 535 #ifdef XML_NS |
463 | 536 |
464 static const struct normal_encoding latin1_encoding_ns = { | 537 static const struct normal_encoding latin1_encoding_ns = { |
465 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, | 538 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, |
466 { | 539 { |
467 #include "asciitab.h" | 540 #include "asciitab.h" |
468 #include "latin1tab.h" | 541 #include "latin1tab.h" |
469 }, | 542 }, |
470 STANDARD_VTABLE(sb_) | 543 STANDARD_VTABLE(sb_) NULL_VTABLE |
471 }; | 544 }; |
472 | 545 |
473 #endif | 546 #endif |
474 | 547 |
475 static const struct normal_encoding latin1_encoding = { | 548 static const struct normal_encoding latin1_encoding = { |
476 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, | 549 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, |
477 { | 550 { |
478 #define BT_COLON BT_NMSTRT | 551 #define BT_COLON BT_NMSTRT |
479 #include "asciitab.h" | 552 #include "asciitab.h" |
480 #undef BT_COLON | 553 #undef BT_COLON |
481 #include "latin1tab.h" | 554 #include "latin1tab.h" |
482 }, | 555 }, |
483 STANDARD_VTABLE(sb_) | 556 STANDARD_VTABLE(sb_) NULL_VTABLE |
484 }; | 557 }; |
485 | 558 |
486 static void PTRCALL | 559 static enum XML_Convert_Result PTRCALL |
487 ascii_toUtf8(const ENCODING *enc, | 560 ascii_toUtf8(const ENCODING *UNUSED_P(enc), |
488 const char **fromP, const char *fromLim, | 561 const char **fromP, const char *fromLim, |
489 char **toP, const char *toLim) | 562 char **toP, const char *toLim) |
490 { | 563 { |
491 while (*fromP != fromLim && *toP != toLim) | 564 while (*fromP < fromLim && *toP < toLim) |
492 *(*toP)++ = *(*fromP)++; | 565 *(*toP)++ = *(*fromP)++; |
| 566 |
| 567 if ((*toP == toLim) && (*fromP < fromLim)) |
| 568 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 569 else |
| 570 return XML_CONVERT_COMPLETED; |
493 } | 571 } |
494 | 572 |
495 #ifdef XML_NS | 573 #ifdef XML_NS |
496 | 574 |
497 static const struct normal_encoding ascii_encoding_ns = { | 575 static const struct normal_encoding ascii_encoding_ns = { |
498 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, | 576 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, |
499 { | 577 { |
500 #include "asciitab.h" | 578 #include "asciitab.h" |
501 /* BT_NONXML == 0 */ | 579 /* BT_NONXML == 0 */ |
502 }, | 580 }, |
503 STANDARD_VTABLE(sb_) | 581 STANDARD_VTABLE(sb_) NULL_VTABLE |
504 }; | 582 }; |
505 | 583 |
506 #endif | 584 #endif |
507 | 585 |
508 static const struct normal_encoding ascii_encoding = { | 586 static const struct normal_encoding ascii_encoding = { |
509 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, | 587 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, |
510 { | 588 { |
511 #define BT_COLON BT_NMSTRT | 589 #define BT_COLON BT_NMSTRT |
512 #include "asciitab.h" | 590 #include "asciitab.h" |
513 #undef BT_COLON | 591 #undef BT_COLON |
514 /* BT_NONXML == 0 */ | 592 /* BT_NONXML == 0 */ |
515 }, | 593 }, |
516 STANDARD_VTABLE(sb_) | 594 STANDARD_VTABLE(sb_) NULL_VTABLE |
517 }; | 595 }; |
518 | 596 |
519 static int PTRFASTCALL | 597 static int PTRFASTCALL |
520 unicode_byte_type(char hi, char lo) | 598 unicode_byte_type(char hi, char lo) |
521 { | 599 { |
522 switch ((unsigned char)hi) { | 600 switch ((unsigned char)hi) { |
523 case 0xD8: case 0xD9: case 0xDA: case 0xDB: | 601 case 0xD8: case 0xD9: case 0xDA: case 0xDB: |
524 return BT_LEAD4; | 602 return BT_LEAD4; |
525 case 0xDC: case 0xDD: case 0xDE: case 0xDF: | 603 case 0xDC: case 0xDD: case 0xDE: case 0xDF: |
526 return BT_TRAIL; | 604 return BT_TRAIL; |
527 case 0xFF: | 605 case 0xFF: |
528 switch ((unsigned char)lo) { | 606 switch ((unsigned char)lo) { |
529 case 0xFF: | 607 case 0xFF: |
530 case 0xFE: | 608 case 0xFE: |
531 return BT_NONXML; | 609 return BT_NONXML; |
532 } | 610 } |
533 break; | 611 break; |
534 } | 612 } |
535 return BT_NONASCII; | 613 return BT_NONASCII; |
536 } | 614 } |
537 | 615 |
538 #define DEFINE_UTF16_TO_UTF8(E) \ | 616 #define DEFINE_UTF16_TO_UTF8(E) \ |
539 static void PTRCALL \ | 617 static enum XML_Convert_Result PTRCALL \ |
540 E ## toUtf8(const ENCODING *enc, \ | 618 E ## toUtf8(const ENCODING *UNUSED_P(enc), \ |
541 const char **fromP, const char *fromLim, \ | 619 const char **fromP, const char *fromLim, \ |
542 char **toP, const char *toLim) \ | 620 char **toP, const char *toLim) \ |
543 { \ | 621 { \ |
544 const char *from; \ | 622 const char *from = *fromP; \ |
545 for (from = *fromP; from != fromLim; from += 2) { \ | 623 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ |
| 624 for (; from < fromLim; from += 2) { \ |
546 int plane; \ | 625 int plane; \ |
547 unsigned char lo2; \ | 626 unsigned char lo2; \ |
548 unsigned char lo = GET_LO(from); \ | 627 unsigned char lo = GET_LO(from); \ |
549 unsigned char hi = GET_HI(from); \ | 628 unsigned char hi = GET_HI(from); \ |
550 switch (hi) { \ | 629 switch (hi) { \ |
551 case 0: \ | 630 case 0: \ |
552 if (lo < 0x80) { \ | 631 if (lo < 0x80) { \ |
553 if (*toP == toLim) { \ | 632 if (*toP == toLim) { \ |
554 *fromP = from; \ | 633 *fromP = from; \ |
555 return; \ | 634 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
556 } \ | 635 } \ |
557 *(*toP)++ = lo; \ | 636 *(*toP)++ = lo; \ |
558 break; \ | 637 break; \ |
559 } \ | 638 } \ |
560 /* fall through */ \ | 639 /* fall through */ \ |
561 case 0x1: case 0x2: case 0x3: \ | 640 case 0x1: case 0x2: case 0x3: \ |
562 case 0x4: case 0x5: case 0x6: case 0x7: \ | 641 case 0x4: case 0x5: case 0x6: case 0x7: \ |
563 if (toLim - *toP < 2) { \ | 642 if (toLim - *toP < 2) { \ |
564 *fromP = from; \ | 643 *fromP = from; \ |
565 return; \ | 644 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
566 } \ | 645 } \ |
567 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ | 646 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ |
568 *(*toP)++ = ((lo & 0x3f) | 0x80); \ | 647 *(*toP)++ = ((lo & 0x3f) | 0x80); \ |
569 break; \ | 648 break; \ |
570 default: \ | 649 default: \ |
571 if (toLim - *toP < 3) { \ | 650 if (toLim - *toP < 3) { \ |
572 *fromP = from; \ | 651 *fromP = from; \ |
573 return; \ | 652 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
574 } \ | 653 } \ |
575 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ | 654 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ |
576 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ | 655 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ |
577 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ | 656 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ |
578 *(*toP)++ = ((lo & 0x3f) | 0x80); \ | 657 *(*toP)++ = ((lo & 0x3f) | 0x80); \ |
579 break; \ | 658 break; \ |
580 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ | 659 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ |
581 if (toLim - *toP < 4) { \ | 660 if (toLim - *toP < 4) { \ |
582 *fromP = from; \ | 661 *fromP = from; \ |
583 return; \ | 662 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
| 663 } \ |
| 664 if (fromLim - from < 4) { \ |
| 665 *fromP = from; \ |
| 666 return XML_CONVERT_INPUT_INCOMPLETE; \ |
584 } \ | 667 } \ |
585 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ | 668 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ |
586 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ | 669 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ |
587 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ | 670 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ |
588 from += 2; \ | 671 from += 2; \ |
589 lo2 = GET_LO(from); \ | 672 lo2 = GET_LO(from); \ |
590 *(*toP)++ = (((lo & 0x3) << 4) \ | 673 *(*toP)++ = (((lo & 0x3) << 4) \ |
591 | ((GET_HI(from) & 0x3) << 2) \ | 674 | ((GET_HI(from) & 0x3) << 2) \ |
592 | (lo2 >> 6) \ | 675 | (lo2 >> 6) \ |
593 | 0x80); \ | 676 | 0x80); \ |
594 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ | 677 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ |
595 break; \ | 678 break; \ |
596 } \ | 679 } \ |
597 } \ | 680 } \ |
598 *fromP = from; \ | 681 *fromP = from; \ |
| 682 if (from < fromLim) \ |
| 683 return XML_CONVERT_INPUT_INCOMPLETE; \ |
| 684 else \ |
| 685 return XML_CONVERT_COMPLETED; \ |
599 } | 686 } |
600 | 687 |
601 #define DEFINE_UTF16_TO_UTF16(E) \ | 688 #define DEFINE_UTF16_TO_UTF16(E) \ |
602 static void PTRCALL \ | 689 static enum XML_Convert_Result PTRCALL \ |
603 E ## toUtf16(const ENCODING *enc, \ | 690 E ## toUtf16(const ENCODING *UNUSED_P(enc), \ |
604 const char **fromP, const char *fromLim, \ | 691 const char **fromP, const char *fromLim, \ |
605 unsigned short **toP, const unsigned short *toLim) \ | 692 unsigned short **toP, const unsigned short *toLim) \ |
606 { \ | 693 { \ |
| 694 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ |
| 695 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ |
607 /* Avoid copying first half only of surrogate */ \ | 696 /* Avoid copying first half only of surrogate */ \ |
608 if (fromLim - *fromP > ((toLim - *toP) << 1) \ | 697 if (fromLim - *fromP > ((toLim - *toP) << 1) \ |
609 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ | 698 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ |
610 fromLim -= 2; \ | 699 fromLim -= 2; \ |
611 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ | 700 res = XML_CONVERT_INPUT_INCOMPLETE; \ |
| 701 } \ |
| 702 for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ |
612 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ | 703 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ |
| 704 if ((*toP == toLim) && (*fromP < fromLim)) \ |
| 705 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
| 706 else \ |
| 707 return res; \ |
613 } | 708 } |
614 | 709 |
615 #define SET2(ptr, ch) \ | 710 #define SET2(ptr, ch) \ |
616 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) | 711 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) |
617 #define GET_LO(ptr) ((unsigned char)(ptr)[0]) | 712 #define GET_LO(ptr) ((unsigned char)(ptr)[0]) |
618 #define GET_HI(ptr) ((unsigned char)(ptr)[1]) | 713 #define GET_HI(ptr) ((unsigned char)(ptr)[1]) |
619 | 714 |
620 DEFINE_UTF16_TO_UTF8(little2_) | 715 DEFINE_UTF16_TO_UTF8(little2_) |
621 DEFINE_UTF16_TO_UTF16(little2_) | 716 DEFINE_UTF16_TO_UTF16(little2_) |
622 | 717 |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
719 #if BYTEORDER == 1234 | 814 #if BYTEORDER == 1234 |
720 1 | 815 1 |
721 #else | 816 #else |
722 0 | 817 0 |
723 #endif | 818 #endif |
724 }, | 819 }, |
725 { | 820 { |
726 #include "asciitab.h" | 821 #include "asciitab.h" |
727 #include "latin1tab.h" | 822 #include "latin1tab.h" |
728 }, | 823 }, |
729 STANDARD_VTABLE(little2_) | 824 STANDARD_VTABLE(little2_) NULL_VTABLE |
730 }; | 825 }; |
731 | 826 |
732 #endif | 827 #endif |
733 | 828 |
734 static const struct normal_encoding little2_encoding = { | 829 static const struct normal_encoding little2_encoding = { |
735 { VTABLE, 2, 0, | 830 { VTABLE, 2, 0, |
736 #if BYTEORDER == 1234 | 831 #if BYTEORDER == 1234 |
737 1 | 832 1 |
738 #else | 833 #else |
739 0 | 834 0 |
740 #endif | 835 #endif |
741 }, | 836 }, |
742 { | 837 { |
743 #define BT_COLON BT_NMSTRT | 838 #define BT_COLON BT_NMSTRT |
744 #include "asciitab.h" | 839 #include "asciitab.h" |
745 #undef BT_COLON | 840 #undef BT_COLON |
746 #include "latin1tab.h" | 841 #include "latin1tab.h" |
747 }, | 842 }, |
748 STANDARD_VTABLE(little2_) | 843 STANDARD_VTABLE(little2_) NULL_VTABLE |
749 }; | 844 }; |
750 | 845 |
751 #if BYTEORDER != 4321 | 846 #if BYTEORDER != 4321 |
752 | 847 |
753 #ifdef XML_NS | 848 #ifdef XML_NS |
754 | 849 |
755 static const struct normal_encoding internal_little2_encoding_ns = { | 850 static const struct normal_encoding internal_little2_encoding_ns = { |
756 { VTABLE, 2, 0, 1 }, | 851 { VTABLE, 2, 0, 1 }, |
757 { | 852 { |
758 #include "iasciitab.h" | 853 #include "iasciitab.h" |
759 #include "latin1tab.h" | 854 #include "latin1tab.h" |
760 }, | 855 }, |
761 STANDARD_VTABLE(little2_) | 856 STANDARD_VTABLE(little2_) NULL_VTABLE |
762 }; | 857 }; |
763 | 858 |
764 #endif | 859 #endif |
765 | 860 |
766 static const struct normal_encoding internal_little2_encoding = { | 861 static const struct normal_encoding internal_little2_encoding = { |
767 { VTABLE, 2, 0, 1 }, | 862 { VTABLE, 2, 0, 1 }, |
768 { | 863 { |
769 #define BT_COLON BT_NMSTRT | 864 #define BT_COLON BT_NMSTRT |
770 #include "iasciitab.h" | 865 #include "iasciitab.h" |
771 #undef BT_COLON | 866 #undef BT_COLON |
772 #include "latin1tab.h" | 867 #include "latin1tab.h" |
773 }, | 868 }, |
774 STANDARD_VTABLE(little2_) | 869 STANDARD_VTABLE(little2_) NULL_VTABLE |
775 }; | 870 }; |
776 | 871 |
777 #endif | 872 #endif |
778 | 873 |
779 | 874 |
780 #define BIG2_BYTE_TYPE(enc, p) \ | 875 #define BIG2_BYTE_TYPE(enc, p) \ |
781 ((p)[0] == 0 \ | 876 ((p)[0] == 0 \ |
782 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ | 877 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ |
783 : unicode_byte_type((p)[0], (p)[1])) | 878 : unicode_byte_type((p)[0], (p)[1])) |
784 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) | 879 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
860 #if BYTEORDER == 4321 | 955 #if BYTEORDER == 4321 |
861 1 | 956 1 |
862 #else | 957 #else |
863 0 | 958 0 |
864 #endif | 959 #endif |
865 }, | 960 }, |
866 { | 961 { |
867 #include "asciitab.h" | 962 #include "asciitab.h" |
868 #include "latin1tab.h" | 963 #include "latin1tab.h" |
869 }, | 964 }, |
870 STANDARD_VTABLE(big2_) | 965 STANDARD_VTABLE(big2_) NULL_VTABLE |
871 }; | 966 }; |
872 | 967 |
873 #endif | 968 #endif |
874 | 969 |
875 static const struct normal_encoding big2_encoding = { | 970 static const struct normal_encoding big2_encoding = { |
876 { VTABLE, 2, 0, | 971 { VTABLE, 2, 0, |
877 #if BYTEORDER == 4321 | 972 #if BYTEORDER == 4321 |
878 1 | 973 1 |
879 #else | 974 #else |
880 0 | 975 0 |
881 #endif | 976 #endif |
882 }, | 977 }, |
883 { | 978 { |
884 #define BT_COLON BT_NMSTRT | 979 #define BT_COLON BT_NMSTRT |
885 #include "asciitab.h" | 980 #include "asciitab.h" |
886 #undef BT_COLON | 981 #undef BT_COLON |
887 #include "latin1tab.h" | 982 #include "latin1tab.h" |
888 }, | 983 }, |
889 STANDARD_VTABLE(big2_) | 984 STANDARD_VTABLE(big2_) NULL_VTABLE |
890 }; | 985 }; |
891 | 986 |
892 #if BYTEORDER != 1234 | 987 #if BYTEORDER != 1234 |
893 | 988 |
894 #ifdef XML_NS | 989 #ifdef XML_NS |
895 | 990 |
896 static const struct normal_encoding internal_big2_encoding_ns = { | 991 static const struct normal_encoding internal_big2_encoding_ns = { |
897 { VTABLE, 2, 0, 1 }, | 992 { VTABLE, 2, 0, 1 }, |
898 { | 993 { |
899 #include "iasciitab.h" | 994 #include "iasciitab.h" |
900 #include "latin1tab.h" | 995 #include "latin1tab.h" |
901 }, | 996 }, |
902 STANDARD_VTABLE(big2_) | 997 STANDARD_VTABLE(big2_) NULL_VTABLE |
903 }; | 998 }; |
904 | 999 |
905 #endif | 1000 #endif |
906 | 1001 |
907 static const struct normal_encoding internal_big2_encoding = { | 1002 static const struct normal_encoding internal_big2_encoding = { |
908 { VTABLE, 2, 0, 1 }, | 1003 { VTABLE, 2, 0, 1 }, |
909 { | 1004 { |
910 #define BT_COLON BT_NMSTRT | 1005 #define BT_COLON BT_NMSTRT |
911 #include "iasciitab.h" | 1006 #include "iasciitab.h" |
912 #undef BT_COLON | 1007 #undef BT_COLON |
913 #include "latin1tab.h" | 1008 #include "latin1tab.h" |
914 }, | 1009 }, |
915 STANDARD_VTABLE(big2_) | 1010 STANDARD_VTABLE(big2_) NULL_VTABLE |
916 }; | 1011 }; |
917 | 1012 |
918 #endif | 1013 #endif |
919 | 1014 |
920 #undef PREFIX | 1015 #undef PREFIX |
921 | 1016 |
922 static int FASTCALL | 1017 static int FASTCALL |
923 streqci(const char *s1, const char *s2) | 1018 streqci(const char *s1, const char *s2) |
924 { | 1019 { |
925 for (;;) { | 1020 for (;;) { |
926 char c1 = *s1++; | 1021 char c1 = *s1++; |
927 char c2 = *s2++; | 1022 char c2 = *s2++; |
928 if (ASCII_a <= c1 && c1 <= ASCII_z) | 1023 if (ASCII_a <= c1 && c1 <= ASCII_z) |
929 c1 += ASCII_A - ASCII_a; | 1024 c1 += ASCII_A - ASCII_a; |
930 if (ASCII_a <= c2 && c2 <= ASCII_z) | 1025 if (ASCII_a <= c2 && c2 <= ASCII_z) |
931 c2 += ASCII_A - ASCII_a; | 1026 c2 += ASCII_A - ASCII_a; |
932 if (c1 != c2) | 1027 if (c1 != c2) |
933 return 0; | 1028 return 0; |
934 if (!c1) | 1029 if (!c1) |
935 break; | 1030 break; |
936 } | 1031 } |
937 return 1; | 1032 return 1; |
938 } | 1033 } |
939 | 1034 |
940 static void PTRCALL | 1035 static void PTRCALL |
941 initUpdatePosition(const ENCODING *enc, const char *ptr, | 1036 initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr, |
942 const char *end, POSITION *pos) | 1037 const char *end, POSITION *pos) |
943 { | 1038 { |
944 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); | 1039 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); |
945 } | 1040 } |
946 | 1041 |
947 static int | 1042 static int |
948 toAscii(const ENCODING *enc, const char *ptr, const char *end) | 1043 toAscii(const ENCODING *enc, const char *ptr, const char *end) |
949 { | 1044 { |
950 char buf[1]; | 1045 char buf[1]; |
951 char *p = buf; | 1046 char *p = buf; |
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1281 } | 1376 } |
1282 | 1377 |
1283 static int PTRFASTCALL | 1378 static int PTRFASTCALL |
1284 unknown_isInvalid(const ENCODING *enc, const char *p) | 1379 unknown_isInvalid(const ENCODING *enc, const char *p) |
1285 { | 1380 { |
1286 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 1381 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
1287 int c = uenc->convert(uenc->userData, p); | 1382 int c = uenc->convert(uenc->userData, p); |
1288 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; | 1383 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; |
1289 } | 1384 } |
1290 | 1385 |
1291 static void PTRCALL | 1386 static enum XML_Convert_Result PTRCALL |
1292 unknown_toUtf8(const ENCODING *enc, | 1387 unknown_toUtf8(const ENCODING *enc, |
1293 const char **fromP, const char *fromLim, | 1388 const char **fromP, const char *fromLim, |
1294 char **toP, const char *toLim) | 1389 char **toP, const char *toLim) |
1295 { | 1390 { |
1296 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 1391 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
1297 char buf[XML_UTF8_ENCODE_MAX]; | 1392 char buf[XML_UTF8_ENCODE_MAX]; |
1298 for (;;) { | 1393 for (;;) { |
1299 const char *utf8; | 1394 const char *utf8; |
1300 int n; | 1395 int n; |
1301 if (*fromP == fromLim) | 1396 if (*fromP == fromLim) |
1302 break; | 1397 return XML_CONVERT_COMPLETED; |
1303 utf8 = uenc->utf8[(unsigned char)**fromP]; | 1398 utf8 = uenc->utf8[(unsigned char)**fromP]; |
1304 n = *utf8++; | 1399 n = *utf8++; |
1305 if (n == 0) { | 1400 if (n == 0) { |
1306 int c = uenc->convert(uenc->userData, *fromP); | 1401 int c = uenc->convert(uenc->userData, *fromP); |
1307 n = XmlUtf8Encode(c, buf); | 1402 n = XmlUtf8Encode(c, buf); |
1308 if (n > toLim - *toP) | 1403 if (n > toLim - *toP) |
1309 break; | 1404 return XML_CONVERT_OUTPUT_EXHAUSTED; |
1310 utf8 = buf; | 1405 utf8 = buf; |
1311 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] | 1406 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] |
1312 - (BT_LEAD2 - 2)); | 1407 - (BT_LEAD2 - 2)); |
1313 } | 1408 } |
1314 else { | 1409 else { |
1315 if (n > toLim - *toP) | 1410 if (n > toLim - *toP) |
1316 break; | 1411 return XML_CONVERT_OUTPUT_EXHAUSTED; |
1317 (*fromP)++; | 1412 (*fromP)++; |
1318 } | 1413 } |
1319 do { | 1414 do { |
1320 *(*toP)++ = *utf8++; | 1415 *(*toP)++ = *utf8++; |
1321 } while (--n != 0); | 1416 } while (--n != 0); |
1322 } | 1417 } |
1323 } | 1418 } |
1324 | 1419 |
1325 static void PTRCALL | 1420 static enum XML_Convert_Result PTRCALL |
1326 unknown_toUtf16(const ENCODING *enc, | 1421 unknown_toUtf16(const ENCODING *enc, |
1327 const char **fromP, const char *fromLim, | 1422 const char **fromP, const char *fromLim, |
1328 unsigned short **toP, const unsigned short *toLim) | 1423 unsigned short **toP, const unsigned short *toLim) |
1329 { | 1424 { |
1330 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 1425 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
1331 while (*fromP != fromLim && *toP != toLim) { | 1426 while (*fromP < fromLim && *toP < toLim) { |
1332 unsigned short c = uenc->utf16[(unsigned char)**fromP]; | 1427 unsigned short c = uenc->utf16[(unsigned char)**fromP]; |
1333 if (c == 0) { | 1428 if (c == 0) { |
1334 c = (unsigned short) | 1429 c = (unsigned short) |
1335 uenc->convert(uenc->userData, *fromP); | 1430 uenc->convert(uenc->userData, *fromP); |
1336 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] | 1431 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] |
1337 - (BT_LEAD2 - 2)); | 1432 - (BT_LEAD2 - 2)); |
1338 } | 1433 } |
1339 else | 1434 else |
1340 (*fromP)++; | 1435 (*fromP)++; |
1341 *(*toP)++ = c; | 1436 *(*toP)++ = c; |
1342 } | 1437 } |
| 1438 |
| 1439 if ((*toP == toLim) && (*fromP < fromLim)) |
| 1440 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 1441 else |
| 1442 return XML_CONVERT_COMPLETED; |
1343 } | 1443 } |
1344 | 1444 |
1345 ENCODING * | 1445 ENCODING * |
1346 XmlInitUnknownEncoding(void *mem, | 1446 XmlInitUnknownEncoding(void *mem, |
1347 int *table, | 1447 int *table, |
1348 CONVERTER convert, | 1448 CONVERTER convert, |
1349 void *userData) | 1449 void *userData) |
1350 { | 1450 { |
1351 int i; | 1451 int i; |
1352 struct unknown_encoding *e = (struct unknown_encoding *)mem; | 1452 struct unknown_encoding *e = (struct unknown_encoding *)mem; |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1496 static int | 1596 static int |
1497 initScan(const ENCODING * const *encodingTable, | 1597 initScan(const ENCODING * const *encodingTable, |
1498 const INIT_ENCODING *enc, | 1598 const INIT_ENCODING *enc, |
1499 int state, | 1599 int state, |
1500 const char *ptr, | 1600 const char *ptr, |
1501 const char *end, | 1601 const char *end, |
1502 const char **nextTokPtr) | 1602 const char **nextTokPtr) |
1503 { | 1603 { |
1504 const ENCODING **encPtr; | 1604 const ENCODING **encPtr; |
1505 | 1605 |
1506 if (ptr == end) | 1606 if (ptr >= end) |
1507 return XML_TOK_NONE; | 1607 return XML_TOK_NONE; |
1508 encPtr = enc->encPtr; | 1608 encPtr = enc->encPtr; |
1509 if (ptr + 1 == end) { | 1609 if (ptr + 1 == end) { |
1510 /* only a single byte available for auto-detection */ | 1610 /* only a single byte available for auto-detection */ |
1511 #ifndef XML_DTD /* FIXME */ | 1611 #ifndef XML_DTD /* FIXME */ |
1512 /* a well-formed document entity must have more than one byte */ | 1612 /* a well-formed document entity must have more than one byte */ |
1513 if (state != XML_CONTENT_STATE) | 1613 if (state != XML_CONTENT_STATE) |
1514 return XML_TOK_PARTIAL; | 1614 return XML_TOK_PARTIAL; |
1515 #endif | 1615 #endif |
1516 /* so we're parsing an external text entity... */ | 1616 /* so we're parsing an external text entity... */ |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1642 CONVERTER convert, | 1742 CONVERTER convert, |
1643 void *userData) | 1743 void *userData) |
1644 { | 1744 { |
1645 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); | 1745 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); |
1646 if (enc) | 1746 if (enc) |
1647 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; | 1747 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; |
1648 return enc; | 1748 return enc; |
1649 } | 1749 } |
1650 | 1750 |
1651 #endif /* XML_NS */ | 1751 #endif /* XML_NS */ |
OLD | NEW |