| OLD | NEW |
| 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd | 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
| 2 See the file COPYING for copying permission. | 2 See the file COPYING for copying permission. |
| 3 */ | 3 */ |
| 4 | 4 |
| 5 #include <stddef.h> | 5 #include <stddef.h> |
| 6 | 6 |
| 7 #ifdef COMPILED_FROM_DSP | 7 #ifdef WIN32 |
| 8 #include "winconfig.h" | 8 #include "winconfig.h" |
| 9 #elif defined(MACOS_CLASSIC) | 9 #elif defined(MACOS_CLASSIC) |
| 10 #include "macconfig.h" | 10 #include "macconfig.h" |
| 11 #elif defined(__amigaos__) | 11 #elif defined(__amigaos__) |
| 12 #include "amigaconfig.h" | 12 #include "amigaconfig.h" |
| 13 #elif defined(__WATCOMC__) | 13 #elif defined(__WATCOMC__) |
| 14 #include "watcomconfig.h" | 14 #include "watcomconfig.h" |
| 15 #else | 15 #else |
| 16 #ifdef HAVE_EXPAT_CONFIG_H | 16 #ifdef HAVE_EXPAT_CONFIG_H |
| 17 #include <expat_config.h> | 17 #include <expat_config.h> |
| 18 #endif | 18 #endif |
| 19 #endif /* ndef COMPILED_FROM_DSP */ | 19 #endif /* ndef WIN32 */ |
| 20 | 20 |
| 21 #include "expat_external.h" | 21 #include "expat_external.h" |
| 22 #include "internal.h" | 22 #include "internal.h" |
| 23 #include "xmltok.h" | 23 #include "xmltok.h" |
| 24 #include "nametab.h" | 24 #include "nametab.h" |
| 25 | 25 |
| 26 #ifdef XML_DTD | 26 #ifdef XML_DTD |
| 27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) | 27 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) |
| 28 #else | 28 #else |
| 29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ | 29 #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ |
| 30 #endif | 30 #endif |
| 31 | 31 |
| 32 #define VTABLE1 \ | 32 #define VTABLE1 \ |
| 33 { PREFIX(prologTok), PREFIX(contentTok), \ | 33 { PREFIX(prologTok), PREFIX(contentTok), \ |
| 34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ | 34 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ |
| 35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ | 35 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ |
| 36 PREFIX(sameName), \ | 36 PREFIX(sameName), \ |
| 37 PREFIX(nameMatchesAscii), \ | 37 PREFIX(nameMatchesAscii), \ |
| 38 PREFIX(nameLength), \ | 38 PREFIX(nameLength), \ |
| 39 PREFIX(skipS), \ | 39 PREFIX(skipS), \ |
| 40 PREFIX(getAtts), \ | 40 PREFIX(getAtts), \ |
| 41 PREFIX(charRefNumber), \ | 41 PREFIX(charRefNumber), \ |
| 42 PREFIX(predefinedEntityName), \ | 42 PREFIX(predefinedEntityName), \ |
| 43 PREFIX(updatePosition), \ | 43 PREFIX(updatePosition), \ |
| 44 PREFIX(isPublicId) | 44 PREFIX(isPublicId) |
| 45 | 45 |
| 46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) | 46 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) |
| 47 | 47 |
| 48 #define UCS2_GET_NAMING(pages, hi, lo) \ | 48 #define UCS2_GET_NAMING(pages, hi, lo) \ |
| 49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) | 49 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) |
| 50 | 50 |
| 51 /* A 2 byte UTF-8 representation splits the characters 11 bits between | 51 /* A 2 byte UTF-8 representation splits the characters 11 bits between |
| 52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into | 52 the bottom 5 and 6 bits of the bytes. We need 8 bits to index into |
| 53 pages, 3 bits to add to that index and 5 bits to generate the mask. | 53 pages, 3 bits to add to that index and 5 bits to generate the mask. |
| 54 */ | 54 */ |
| 55 #define UTF8_GET_NAMING2(pages, byte) \ | 55 #define UTF8_GET_NAMING2(pages, byte) \ |
| 56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ | 56 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ |
| 57 + ((((byte)[0]) & 3) << 1) \ | 57 + ((((byte)[0]) & 3) << 1) \ |
| 58 + ((((byte)[1]) >> 5) & 1)] \ | 58 + ((((byte)[1]) >> 5) & 1)] \ |
| 59 & (1 << (((byte)[1]) & 0x1F))) | 59 & (1u << (((byte)[1]) & 0x1F))) |
| 60 | 60 |
| 61 /* A 3 byte UTF-8 representation splits the characters 16 bits between | 61 /* A 3 byte UTF-8 representation splits the characters 16 bits between |
| 62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index | 62 the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index |
| 63 into pages, 3 bits to add to that index and 5 bits to generate the | 63 into pages, 3 bits to add to that index and 5 bits to generate the |
| 64 mask. | 64 mask. |
| 65 */ | 65 */ |
| 66 #define UTF8_GET_NAMING3(pages, byte) \ | 66 #define UTF8_GET_NAMING3(pages, byte) \ |
| 67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ | 67 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ |
| 68 + ((((byte)[1]) >> 2) & 0xF)] \ | 68 + ((((byte)[1]) >> 2) & 0xF)] \ |
| 69 << 3) \ | 69 << 3) \ |
| 70 + ((((byte)[1]) & 3) << 1) \ | 70 + ((((byte)[1]) & 3) << 1) \ |
| 71 + ((((byte)[2]) >> 5) & 1)] \ | 71 + ((((byte)[2]) >> 5) & 1)] \ |
| 72 & (1 << (((byte)[2]) & 0x1F))) | 72 & (1u << (((byte)[2]) & 0x1F))) |
| 73 | 73 |
| 74 #define UTF8_GET_NAMING(pages, p, n) \ | 74 #define UTF8_GET_NAMING(pages, p, n) \ |
| 75 ((n) == 2 \ | 75 ((n) == 2 \ |
| 76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ | 76 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ |
| 77 : ((n) == 3 \ | 77 : ((n) == 3 \ |
| 78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ | 78 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ |
| 79 : 0)) | 79 : 0)) |
| 80 | 80 |
| 81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B | 81 /* Detection of invalid UTF-8 sequences is based on Table 3.1B |
| 82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ | 82 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 115 || \ | 115 || \ |
| 116 ((*p) == 0xF0 \ | 116 ((*p) == 0xF0 \ |
| 117 ? \ | 117 ? \ |
| 118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ | 118 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ |
| 119 : \ | 119 : \ |
| 120 ((p)[1] & 0x80) == 0 \ | 120 ((p)[1] & 0x80) == 0 \ |
| 121 || \ | 121 || \ |
| 122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) | 122 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) |
| 123 | 123 |
| 124 static int PTRFASTCALL | 124 static int PTRFASTCALL |
| 125 isNever(const ENCODING *enc, const char *p) | 125 isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p)) |
| 126 { | 126 { |
| 127 return 0; | 127 return 0; |
| 128 } | 128 } |
| 129 | 129 |
| 130 static int PTRFASTCALL | 130 static int PTRFASTCALL |
| 131 utf8_isName2(const ENCODING *enc, const char *p) | 131 utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p) |
| 132 { | 132 { |
| 133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); | 133 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); |
| 134 } | 134 } |
| 135 | 135 |
| 136 static int PTRFASTCALL | 136 static int PTRFASTCALL |
| 137 utf8_isName3(const ENCODING *enc, const char *p) | 137 utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p) |
| 138 { | 138 { |
| 139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); | 139 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); |
| 140 } | 140 } |
| 141 | 141 |
| 142 #define utf8_isName4 isNever | 142 #define utf8_isName4 isNever |
| 143 | 143 |
| 144 static int PTRFASTCALL | 144 static int PTRFASTCALL |
| 145 utf8_isNmstrt2(const ENCODING *enc, const char *p) | 145 utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p) |
| 146 { | 146 { |
| 147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); | 147 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); |
| 148 } | 148 } |
| 149 | 149 |
| 150 static int PTRFASTCALL | 150 static int PTRFASTCALL |
| 151 utf8_isNmstrt3(const ENCODING *enc, const char *p) | 151 utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p) |
| 152 { | 152 { |
| 153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); | 153 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); |
| 154 } | 154 } |
| 155 | 155 |
| 156 #define utf8_isNmstrt4 isNever | 156 #define utf8_isNmstrt4 isNever |
| 157 | 157 |
| 158 static int PTRFASTCALL | 158 static int PTRFASTCALL |
| 159 utf8_isInvalid2(const ENCODING *enc, const char *p) | 159 utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p) |
| 160 { | 160 { |
| 161 return UTF8_INVALID2((const unsigned char *)p); | 161 return UTF8_INVALID2((const unsigned char *)p); |
| 162 } | 162 } |
| 163 | 163 |
| 164 static int PTRFASTCALL | 164 static int PTRFASTCALL |
| 165 utf8_isInvalid3(const ENCODING *enc, const char *p) | 165 utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p) |
| 166 { | 166 { |
| 167 return UTF8_INVALID3((const unsigned char *)p); | 167 return UTF8_INVALID3((const unsigned char *)p); |
| 168 } | 168 } |
| 169 | 169 |
| 170 static int PTRFASTCALL | 170 static int PTRFASTCALL |
| 171 utf8_isInvalid4(const ENCODING *enc, const char *p) | 171 utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p) |
| 172 { | 172 { |
| 173 return UTF8_INVALID4((const unsigned char *)p); | 173 return UTF8_INVALID4((const unsigned char *)p); |
| 174 } | 174 } |
| 175 | 175 |
| 176 struct normal_encoding { | 176 struct normal_encoding { |
| 177 ENCODING enc; | 177 ENCODING enc; |
| 178 unsigned char type[256]; | 178 unsigned char type[256]; |
| 179 #ifdef XML_MIN_SIZE | 179 #ifdef XML_MIN_SIZE |
| 180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *); | 180 int (PTRFASTCALL *byteType)(const ENCODING *, const char *); |
| 181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); | 181 int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 215 E ## isName2, \ | 215 E ## isName2, \ |
| 216 E ## isName3, \ | 216 E ## isName3, \ |
| 217 E ## isName4, \ | 217 E ## isName4, \ |
| 218 E ## isNmstrt2, \ | 218 E ## isNmstrt2, \ |
| 219 E ## isNmstrt3, \ | 219 E ## isNmstrt3, \ |
| 220 E ## isNmstrt4, \ | 220 E ## isNmstrt4, \ |
| 221 E ## isInvalid2, \ | 221 E ## isInvalid2, \ |
| 222 E ## isInvalid3, \ | 222 E ## isInvalid3, \ |
| 223 E ## isInvalid4 | 223 E ## isInvalid4 |
| 224 | 224 |
| 225 #define NULL_VTABLE \ |
| 226 /* isName2 */ NULL, \ |
| 227 /* isName3 */ NULL, \ |
| 228 /* isName4 */ NULL, \ |
| 229 /* isNmstrt2 */ NULL, \ |
| 230 /* isNmstrt3 */ NULL, \ |
| 231 /* isNmstrt4 */ NULL, \ |
| 232 /* isInvalid2 */ NULL, \ |
| 233 /* isInvalid3 */ NULL, \ |
| 234 /* isInvalid4 */ NULL |
| 235 |
| 225 static int FASTCALL checkCharRefNumber(int); | 236 static int FASTCALL checkCharRefNumber(int); |
| 226 | 237 |
| 227 #include "xmltok_impl.h" | 238 #include "xmltok_impl.h" |
| 228 #include "ascii.h" | 239 #include "ascii.h" |
| 229 | 240 |
| 230 #ifdef XML_MIN_SIZE | 241 #ifdef XML_MIN_SIZE |
| 231 #define sb_isNameMin isNever | 242 #define sb_isNameMin isNever |
| 232 #define sb_isNmstrtMin isNever | 243 #define sb_isNmstrtMin isNever |
| 233 #endif | 244 #endif |
| 234 | 245 |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 311 #undef IS_NMSTRT_CHAR_MINBPC | 322 #undef IS_NMSTRT_CHAR_MINBPC |
| 312 #undef IS_INVALID_CHAR | 323 #undef IS_INVALID_CHAR |
| 313 | 324 |
| 314 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ | 325 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ |
| 315 UTF8_cval1 = 0x00, | 326 UTF8_cval1 = 0x00, |
| 316 UTF8_cval2 = 0xc0, | 327 UTF8_cval2 = 0xc0, |
| 317 UTF8_cval3 = 0xe0, | 328 UTF8_cval3 = 0xe0, |
| 318 UTF8_cval4 = 0xf0 | 329 UTF8_cval4 = 0xf0 |
| 319 }; | 330 }; |
| 320 | 331 |
| 321 static void PTRCALL | 332 void |
| 322 utf8_toUtf8(const ENCODING *enc, | 333 align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef) |
| 334 { |
| 335 const char * fromLim = *fromLimRef; |
| 336 size_t walked = 0; |
| 337 for (; fromLim > from; fromLim--, walked++) { |
| 338 const unsigned char prev = (unsigned char)fromLim[-1]; |
| 339 if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte
*/ |
| 340 if (walked + 1 >= 4) { |
| 341 fromLim += 4 - 1; |
| 342 break; |
| 343 } else { |
| 344 walked = 0; |
| 345 } |
| 346 } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxx
x byte */ |
| 347 if (walked + 1 >= 3) { |
| 348 fromLim += 3 - 1; |
| 349 break; |
| 350 } else { |
| 351 walked = 0; |
| 352 } |
| 353 } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxx
x byte */ |
| 354 if (walked + 1 >= 2) { |
| 355 fromLim += 2 - 1; |
| 356 break; |
| 357 } else { |
| 358 walked = 0; |
| 359 } |
| 360 } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxx
xx */ |
| 361 break; |
| 362 } |
| 363 } |
| 364 *fromLimRef = fromLim; |
| 365 } |
| 366 |
| 367 static enum XML_Convert_Result PTRCALL |
| 368 utf8_toUtf8(const ENCODING *UNUSED_P(enc), |
| 323 const char **fromP, const char *fromLim, | 369 const char **fromP, const char *fromLim, |
| 324 char **toP, const char *toLim) | 370 char **toP, const char *toLim) |
| 325 { | 371 { |
| 372 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; |
| 326 char *to; | 373 char *to; |
| 327 const char *from; | 374 const char *from; |
| 328 if (fromLim - *fromP > toLim - *toP) { | 375 if (fromLim - *fromP > toLim - *toP) { |
| 329 /* Avoid copying partial characters. */ | 376 /* Avoid copying partial characters. */ |
| 330 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) | 377 res = XML_CONVERT_OUTPUT_EXHAUSTED; |
| 331 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) | 378 fromLim = *fromP + (toLim - *toP); |
| 332 break; | 379 align_limit_to_full_utf8_characters(*fromP, &fromLim); |
| 333 } | 380 } |
| 334 for (to = *toP, from = *fromP; from != fromLim; from++, to++) | 381 for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) |
| 335 *to = *from; | 382 *to = *from; |
| 336 *fromP = from; | 383 *fromP = from; |
| 337 *toP = to; | 384 *toP = to; |
| 385 |
| 386 if ((to == toLim) && (from < fromLim)) |
| 387 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 388 else |
| 389 return res; |
| 338 } | 390 } |
| 339 | 391 |
| 340 static void PTRCALL | 392 static enum XML_Convert_Result PTRCALL |
| 341 utf8_toUtf16(const ENCODING *enc, | 393 utf8_toUtf16(const ENCODING *enc, |
| 342 const char **fromP, const char *fromLim, | 394 const char **fromP, const char *fromLim, |
| 343 unsigned short **toP, const unsigned short *toLim) | 395 unsigned short **toP, const unsigned short *toLim) |
| 344 { | 396 { |
| 397 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; |
| 345 unsigned short *to = *toP; | 398 unsigned short *to = *toP; |
| 346 const char *from = *fromP; | 399 const char *from = *fromP; |
| 347 while (from != fromLim && to != toLim) { | 400 while (from < fromLim && to < toLim) { |
| 348 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { | 401 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { |
| 349 case BT_LEAD2: | 402 case BT_LEAD2: |
| 403 if (fromLim - from < 2) { |
| 404 res = XML_CONVERT_INPUT_INCOMPLETE; |
| 405 break; |
| 406 } |
| 350 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); | 407 *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); |
| 351 from += 2; | 408 from += 2; |
| 352 break; | 409 break; |
| 353 case BT_LEAD3: | 410 case BT_LEAD3: |
| 411 if (fromLim - from < 3) { |
| 412 res = XML_CONVERT_INPUT_INCOMPLETE; |
| 413 break; |
| 414 } |
| 354 *to++ = (unsigned short)(((from[0] & 0xf) << 12) | 415 *to++ = (unsigned short)(((from[0] & 0xf) << 12) |
| 355 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); | 416 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); |
| 356 from += 3; | 417 from += 3; |
| 357 break; | 418 break; |
| 358 case BT_LEAD4: | 419 case BT_LEAD4: |
| 359 { | 420 { |
| 360 unsigned long n; | 421 unsigned long n; |
| 361 if (to + 1 == toLim) | 422 if (toLim - to < 2) { |
| 423 res = XML_CONVERT_OUTPUT_EXHAUSTED; |
| 362 goto after; | 424 goto after; |
| 425 } |
| 426 if (fromLim - from < 4) { |
| 427 res = XML_CONVERT_INPUT_INCOMPLETE; |
| 428 goto after; |
| 429 } |
| 363 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | 430 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) |
| 364 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); | 431 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); |
| 365 n -= 0x10000; | 432 n -= 0x10000; |
| 366 to[0] = (unsigned short)((n >> 10) | 0xD800); | 433 to[0] = (unsigned short)((n >> 10) | 0xD800); |
| 367 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); | 434 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); |
| 368 to += 2; | 435 to += 2; |
| 369 from += 4; | 436 from += 4; |
| 370 } | 437 } |
| 371 break; | 438 break; |
| 372 default: | 439 default: |
| 373 *to++ = *from++; | 440 *to++ = *from++; |
| 374 break; | 441 break; |
| 375 } | 442 } |
| 376 } | 443 } |
| 377 after: | 444 after: |
| 378 *fromP = from; | 445 *fromP = from; |
| 379 *toP = to; | 446 *toP = to; |
| 447 return res; |
| 380 } | 448 } |
| 381 | 449 |
| 382 #ifdef XML_NS | 450 #ifdef XML_NS |
| 383 static const struct normal_encoding utf8_encoding_ns = { | 451 static const struct normal_encoding utf8_encoding_ns = { |
| 384 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, | 452 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, |
| 385 { | 453 { |
| 386 #include "asciitab.h" | 454 #include "asciitab.h" |
| 387 #include "utf8tab.h" | 455 #include "utf8tab.h" |
| 388 }, | 456 }, |
| 389 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) | 457 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) |
| (...skipping 28 matching lines...) Expand all Loading... |
| 418 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, | 486 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, |
| 419 { | 487 { |
| 420 #define BT_COLON BT_NMSTRT | 488 #define BT_COLON BT_NMSTRT |
| 421 #include "iasciitab.h" | 489 #include "iasciitab.h" |
| 422 #undef BT_COLON | 490 #undef BT_COLON |
| 423 #include "utf8tab.h" | 491 #include "utf8tab.h" |
| 424 }, | 492 }, |
| 425 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) | 493 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) |
| 426 }; | 494 }; |
| 427 | 495 |
| 428 static void PTRCALL | 496 static enum XML_Convert_Result PTRCALL |
| 429 latin1_toUtf8(const ENCODING *enc, | 497 latin1_toUtf8(const ENCODING *UNUSED_P(enc), |
| 430 const char **fromP, const char *fromLim, | 498 const char **fromP, const char *fromLim, |
| 431 char **toP, const char *toLim) | 499 char **toP, const char *toLim) |
| 432 { | 500 { |
| 433 for (;;) { | 501 for (;;) { |
| 434 unsigned char c; | 502 unsigned char c; |
| 435 if (*fromP == fromLim) | 503 if (*fromP == fromLim) |
| 436 break; | 504 return XML_CONVERT_COMPLETED; |
| 437 c = (unsigned char)**fromP; | 505 c = (unsigned char)**fromP; |
| 438 if (c & 0x80) { | 506 if (c & 0x80) { |
| 439 if (toLim - *toP < 2) | 507 if (toLim - *toP < 2) |
| 440 break; | 508 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 441 *(*toP)++ = (char)((c >> 6) | UTF8_cval2); | 509 *(*toP)++ = (char)((c >> 6) | UTF8_cval2); |
| 442 *(*toP)++ = (char)((c & 0x3f) | 0x80); | 510 *(*toP)++ = (char)((c & 0x3f) | 0x80); |
| 443 (*fromP)++; | 511 (*fromP)++; |
| 444 } | 512 } |
| 445 else { | 513 else { |
| 446 if (*toP == toLim) | 514 if (*toP == toLim) |
| 447 break; | 515 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 448 *(*toP)++ = *(*fromP)++; | 516 *(*toP)++ = *(*fromP)++; |
| 449 } | 517 } |
| 450 } | 518 } |
| 451 } | 519 } |
| 452 | 520 |
| 453 static void PTRCALL | 521 static enum XML_Convert_Result PTRCALL |
| 454 latin1_toUtf16(const ENCODING *enc, | 522 latin1_toUtf16(const ENCODING *UNUSED_P(enc), |
| 455 const char **fromP, const char *fromLim, | 523 const char **fromP, const char *fromLim, |
| 456 unsigned short **toP, const unsigned short *toLim) | 524 unsigned short **toP, const unsigned short *toLim) |
| 457 { | 525 { |
| 458 while (*fromP != fromLim && *toP != toLim) | 526 while (*fromP < fromLim && *toP < toLim) |
| 459 *(*toP)++ = (unsigned char)*(*fromP)++; | 527 *(*toP)++ = (unsigned char)*(*fromP)++; |
| 528 |
| 529 if ((*toP == toLim) && (*fromP < fromLim)) |
| 530 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 531 else |
| 532 return XML_CONVERT_COMPLETED; |
| 460 } | 533 } |
| 461 | 534 |
| 462 #ifdef XML_NS | 535 #ifdef XML_NS |
| 463 | 536 |
| 464 static const struct normal_encoding latin1_encoding_ns = { | 537 static const struct normal_encoding latin1_encoding_ns = { |
| 465 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, | 538 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, |
| 466 { | 539 { |
| 467 #include "asciitab.h" | 540 #include "asciitab.h" |
| 468 #include "latin1tab.h" | 541 #include "latin1tab.h" |
| 469 }, | 542 }, |
| 470 STANDARD_VTABLE(sb_) | 543 STANDARD_VTABLE(sb_) NULL_VTABLE |
| 471 }; | 544 }; |
| 472 | 545 |
| 473 #endif | 546 #endif |
| 474 | 547 |
| 475 static const struct normal_encoding latin1_encoding = { | 548 static const struct normal_encoding latin1_encoding = { |
| 476 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, | 549 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, |
| 477 { | 550 { |
| 478 #define BT_COLON BT_NMSTRT | 551 #define BT_COLON BT_NMSTRT |
| 479 #include "asciitab.h" | 552 #include "asciitab.h" |
| 480 #undef BT_COLON | 553 #undef BT_COLON |
| 481 #include "latin1tab.h" | 554 #include "latin1tab.h" |
| 482 }, | 555 }, |
| 483 STANDARD_VTABLE(sb_) | 556 STANDARD_VTABLE(sb_) NULL_VTABLE |
| 484 }; | 557 }; |
| 485 | 558 |
| 486 static void PTRCALL | 559 static enum XML_Convert_Result PTRCALL |
| 487 ascii_toUtf8(const ENCODING *enc, | 560 ascii_toUtf8(const ENCODING *UNUSED_P(enc), |
| 488 const char **fromP, const char *fromLim, | 561 const char **fromP, const char *fromLim, |
| 489 char **toP, const char *toLim) | 562 char **toP, const char *toLim) |
| 490 { | 563 { |
| 491 while (*fromP != fromLim && *toP != toLim) | 564 while (*fromP < fromLim && *toP < toLim) |
| 492 *(*toP)++ = *(*fromP)++; | 565 *(*toP)++ = *(*fromP)++; |
| 566 |
| 567 if ((*toP == toLim) && (*fromP < fromLim)) |
| 568 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 569 else |
| 570 return XML_CONVERT_COMPLETED; |
| 493 } | 571 } |
| 494 | 572 |
| 495 #ifdef XML_NS | 573 #ifdef XML_NS |
| 496 | 574 |
| 497 static const struct normal_encoding ascii_encoding_ns = { | 575 static const struct normal_encoding ascii_encoding_ns = { |
| 498 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, | 576 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, |
| 499 { | 577 { |
| 500 #include "asciitab.h" | 578 #include "asciitab.h" |
| 501 /* BT_NONXML == 0 */ | 579 /* BT_NONXML == 0 */ |
| 502 }, | 580 }, |
| 503 STANDARD_VTABLE(sb_) | 581 STANDARD_VTABLE(sb_) NULL_VTABLE |
| 504 }; | 582 }; |
| 505 | 583 |
| 506 #endif | 584 #endif |
| 507 | 585 |
| 508 static const struct normal_encoding ascii_encoding = { | 586 static const struct normal_encoding ascii_encoding = { |
| 509 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, | 587 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, |
| 510 { | 588 { |
| 511 #define BT_COLON BT_NMSTRT | 589 #define BT_COLON BT_NMSTRT |
| 512 #include "asciitab.h" | 590 #include "asciitab.h" |
| 513 #undef BT_COLON | 591 #undef BT_COLON |
| 514 /* BT_NONXML == 0 */ | 592 /* BT_NONXML == 0 */ |
| 515 }, | 593 }, |
| 516 STANDARD_VTABLE(sb_) | 594 STANDARD_VTABLE(sb_) NULL_VTABLE |
| 517 }; | 595 }; |
| 518 | 596 |
| 519 static int PTRFASTCALL | 597 static int PTRFASTCALL |
| 520 unicode_byte_type(char hi, char lo) | 598 unicode_byte_type(char hi, char lo) |
| 521 { | 599 { |
| 522 switch ((unsigned char)hi) { | 600 switch ((unsigned char)hi) { |
| 523 case 0xD8: case 0xD9: case 0xDA: case 0xDB: | 601 case 0xD8: case 0xD9: case 0xDA: case 0xDB: |
| 524 return BT_LEAD4; | 602 return BT_LEAD4; |
| 525 case 0xDC: case 0xDD: case 0xDE: case 0xDF: | 603 case 0xDC: case 0xDD: case 0xDE: case 0xDF: |
| 526 return BT_TRAIL; | 604 return BT_TRAIL; |
| 527 case 0xFF: | 605 case 0xFF: |
| 528 switch ((unsigned char)lo) { | 606 switch ((unsigned char)lo) { |
| 529 case 0xFF: | 607 case 0xFF: |
| 530 case 0xFE: | 608 case 0xFE: |
| 531 return BT_NONXML; | 609 return BT_NONXML; |
| 532 } | 610 } |
| 533 break; | 611 break; |
| 534 } | 612 } |
| 535 return BT_NONASCII; | 613 return BT_NONASCII; |
| 536 } | 614 } |
| 537 | 615 |
| 538 #define DEFINE_UTF16_TO_UTF8(E) \ | 616 #define DEFINE_UTF16_TO_UTF8(E) \ |
| 539 static void PTRCALL \ | 617 static enum XML_Convert_Result PTRCALL \ |
| 540 E ## toUtf8(const ENCODING *enc, \ | 618 E ## toUtf8(const ENCODING *UNUSED_P(enc), \ |
| 541 const char **fromP, const char *fromLim, \ | 619 const char **fromP, const char *fromLim, \ |
| 542 char **toP, const char *toLim) \ | 620 char **toP, const char *toLim) \ |
| 543 { \ | 621 { \ |
| 544 const char *from; \ | 622 const char *from = *fromP; \ |
| 545 for (from = *fromP; from != fromLim; from += 2) { \ | 623 fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ |
| 624 for (; from < fromLim; from += 2) { \ |
| 546 int plane; \ | 625 int plane; \ |
| 547 unsigned char lo2; \ | 626 unsigned char lo2; \ |
| 548 unsigned char lo = GET_LO(from); \ | 627 unsigned char lo = GET_LO(from); \ |
| 549 unsigned char hi = GET_HI(from); \ | 628 unsigned char hi = GET_HI(from); \ |
| 550 switch (hi) { \ | 629 switch (hi) { \ |
| 551 case 0: \ | 630 case 0: \ |
| 552 if (lo < 0x80) { \ | 631 if (lo < 0x80) { \ |
| 553 if (*toP == toLim) { \ | 632 if (*toP == toLim) { \ |
| 554 *fromP = from; \ | 633 *fromP = from; \ |
| 555 return; \ | 634 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
| 556 } \ | 635 } \ |
| 557 *(*toP)++ = lo; \ | 636 *(*toP)++ = lo; \ |
| 558 break; \ | 637 break; \ |
| 559 } \ | 638 } \ |
| 560 /* fall through */ \ | 639 /* fall through */ \ |
| 561 case 0x1: case 0x2: case 0x3: \ | 640 case 0x1: case 0x2: case 0x3: \ |
| 562 case 0x4: case 0x5: case 0x6: case 0x7: \ | 641 case 0x4: case 0x5: case 0x6: case 0x7: \ |
| 563 if (toLim - *toP < 2) { \ | 642 if (toLim - *toP < 2) { \ |
| 564 *fromP = from; \ | 643 *fromP = from; \ |
| 565 return; \ | 644 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
| 566 } \ | 645 } \ |
| 567 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ | 646 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ |
| 568 *(*toP)++ = ((lo & 0x3f) | 0x80); \ | 647 *(*toP)++ = ((lo & 0x3f) | 0x80); \ |
| 569 break; \ | 648 break; \ |
| 570 default: \ | 649 default: \ |
| 571 if (toLim - *toP < 3) { \ | 650 if (toLim - *toP < 3) { \ |
| 572 *fromP = from; \ | 651 *fromP = from; \ |
| 573 return; \ | 652 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
| 574 } \ | 653 } \ |
| 575 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ | 654 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ |
| 576 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ | 655 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ |
| 577 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ | 656 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ |
| 578 *(*toP)++ = ((lo & 0x3f) | 0x80); \ | 657 *(*toP)++ = ((lo & 0x3f) | 0x80); \ |
| 579 break; \ | 658 break; \ |
| 580 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ | 659 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ |
| 581 if (toLim - *toP < 4) { \ | 660 if (toLim - *toP < 4) { \ |
| 582 *fromP = from; \ | 661 *fromP = from; \ |
| 583 return; \ | 662 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
| 663 } \ |
| 664 if (fromLim - from < 4) { \ |
| 665 *fromP = from; \ |
| 666 return XML_CONVERT_INPUT_INCOMPLETE; \ |
| 584 } \ | 667 } \ |
| 585 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ | 668 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ |
| 586 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ | 669 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ |
| 587 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ | 670 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ |
| 588 from += 2; \ | 671 from += 2; \ |
| 589 lo2 = GET_LO(from); \ | 672 lo2 = GET_LO(from); \ |
| 590 *(*toP)++ = (((lo & 0x3) << 4) \ | 673 *(*toP)++ = (((lo & 0x3) << 4) \ |
| 591 | ((GET_HI(from) & 0x3) << 2) \ | 674 | ((GET_HI(from) & 0x3) << 2) \ |
| 592 | (lo2 >> 6) \ | 675 | (lo2 >> 6) \ |
| 593 | 0x80); \ | 676 | 0x80); \ |
| 594 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ | 677 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ |
| 595 break; \ | 678 break; \ |
| 596 } \ | 679 } \ |
| 597 } \ | 680 } \ |
| 598 *fromP = from; \ | 681 *fromP = from; \ |
| 682 if (from < fromLim) \ |
| 683 return XML_CONVERT_INPUT_INCOMPLETE; \ |
| 684 else \ |
| 685 return XML_CONVERT_COMPLETED; \ |
| 599 } | 686 } |
| 600 | 687 |
| 601 #define DEFINE_UTF16_TO_UTF16(E) \ | 688 #define DEFINE_UTF16_TO_UTF16(E) \ |
| 602 static void PTRCALL \ | 689 static enum XML_Convert_Result PTRCALL \ |
| 603 E ## toUtf16(const ENCODING *enc, \ | 690 E ## toUtf16(const ENCODING *UNUSED_P(enc), \ |
| 604 const char **fromP, const char *fromLim, \ | 691 const char **fromP, const char *fromLim, \ |
| 605 unsigned short **toP, const unsigned short *toLim) \ | 692 unsigned short **toP, const unsigned short *toLim) \ |
| 606 { \ | 693 { \ |
| 694 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ |
| 695 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ |
| 607 /* Avoid copying first half only of surrogate */ \ | 696 /* Avoid copying first half only of surrogate */ \ |
| 608 if (fromLim - *fromP > ((toLim - *toP) << 1) \ | 697 if (fromLim - *fromP > ((toLim - *toP) << 1) \ |
| 609 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ | 698 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ |
| 610 fromLim -= 2; \ | 699 fromLim -= 2; \ |
| 611 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ | 700 res = XML_CONVERT_INPUT_INCOMPLETE; \ |
| 701 } \ |
| 702 for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ |
| 612 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ | 703 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ |
| 704 if ((*toP == toLim) && (*fromP < fromLim)) \ |
| 705 return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
| 706 else \ |
| 707 return res; \ |
| 613 } | 708 } |
| 614 | 709 |
| 615 #define SET2(ptr, ch) \ | 710 #define SET2(ptr, ch) \ |
| 616 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) | 711 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) |
| 617 #define GET_LO(ptr) ((unsigned char)(ptr)[0]) | 712 #define GET_LO(ptr) ((unsigned char)(ptr)[0]) |
| 618 #define GET_HI(ptr) ((unsigned char)(ptr)[1]) | 713 #define GET_HI(ptr) ((unsigned char)(ptr)[1]) |
| 619 | 714 |
| 620 DEFINE_UTF16_TO_UTF8(little2_) | 715 DEFINE_UTF16_TO_UTF8(little2_) |
| 621 DEFINE_UTF16_TO_UTF16(little2_) | 716 DEFINE_UTF16_TO_UTF16(little2_) |
| 622 | 717 |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 719 #if BYTEORDER == 1234 | 814 #if BYTEORDER == 1234 |
| 720 1 | 815 1 |
| 721 #else | 816 #else |
| 722 0 | 817 0 |
| 723 #endif | 818 #endif |
| 724 }, | 819 }, |
| 725 { | 820 { |
| 726 #include "asciitab.h" | 821 #include "asciitab.h" |
| 727 #include "latin1tab.h" | 822 #include "latin1tab.h" |
| 728 }, | 823 }, |
| 729 STANDARD_VTABLE(little2_) | 824 STANDARD_VTABLE(little2_) NULL_VTABLE |
| 730 }; | 825 }; |
| 731 | 826 |
| 732 #endif | 827 #endif |
| 733 | 828 |
| 734 static const struct normal_encoding little2_encoding = { | 829 static const struct normal_encoding little2_encoding = { |
| 735 { VTABLE, 2, 0, | 830 { VTABLE, 2, 0, |
| 736 #if BYTEORDER == 1234 | 831 #if BYTEORDER == 1234 |
| 737 1 | 832 1 |
| 738 #else | 833 #else |
| 739 0 | 834 0 |
| 740 #endif | 835 #endif |
| 741 }, | 836 }, |
| 742 { | 837 { |
| 743 #define BT_COLON BT_NMSTRT | 838 #define BT_COLON BT_NMSTRT |
| 744 #include "asciitab.h" | 839 #include "asciitab.h" |
| 745 #undef BT_COLON | 840 #undef BT_COLON |
| 746 #include "latin1tab.h" | 841 #include "latin1tab.h" |
| 747 }, | 842 }, |
| 748 STANDARD_VTABLE(little2_) | 843 STANDARD_VTABLE(little2_) NULL_VTABLE |
| 749 }; | 844 }; |
| 750 | 845 |
| 751 #if BYTEORDER != 4321 | 846 #if BYTEORDER != 4321 |
| 752 | 847 |
| 753 #ifdef XML_NS | 848 #ifdef XML_NS |
| 754 | 849 |
| 755 static const struct normal_encoding internal_little2_encoding_ns = { | 850 static const struct normal_encoding internal_little2_encoding_ns = { |
| 756 { VTABLE, 2, 0, 1 }, | 851 { VTABLE, 2, 0, 1 }, |
| 757 { | 852 { |
| 758 #include "iasciitab.h" | 853 #include "iasciitab.h" |
| 759 #include "latin1tab.h" | 854 #include "latin1tab.h" |
| 760 }, | 855 }, |
| 761 STANDARD_VTABLE(little2_) | 856 STANDARD_VTABLE(little2_) NULL_VTABLE |
| 762 }; | 857 }; |
| 763 | 858 |
| 764 #endif | 859 #endif |
| 765 | 860 |
| 766 static const struct normal_encoding internal_little2_encoding = { | 861 static const struct normal_encoding internal_little2_encoding = { |
| 767 { VTABLE, 2, 0, 1 }, | 862 { VTABLE, 2, 0, 1 }, |
| 768 { | 863 { |
| 769 #define BT_COLON BT_NMSTRT | 864 #define BT_COLON BT_NMSTRT |
| 770 #include "iasciitab.h" | 865 #include "iasciitab.h" |
| 771 #undef BT_COLON | 866 #undef BT_COLON |
| 772 #include "latin1tab.h" | 867 #include "latin1tab.h" |
| 773 }, | 868 }, |
| 774 STANDARD_VTABLE(little2_) | 869 STANDARD_VTABLE(little2_) NULL_VTABLE |
| 775 }; | 870 }; |
| 776 | 871 |
| 777 #endif | 872 #endif |
| 778 | 873 |
| 779 | 874 |
| 780 #define BIG2_BYTE_TYPE(enc, p) \ | 875 #define BIG2_BYTE_TYPE(enc, p) \ |
| 781 ((p)[0] == 0 \ | 876 ((p)[0] == 0 \ |
| 782 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ | 877 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ |
| 783 : unicode_byte_type((p)[0], (p)[1])) | 878 : unicode_byte_type((p)[0], (p)[1])) |
| 784 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) | 879 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 860 #if BYTEORDER == 4321 | 955 #if BYTEORDER == 4321 |
| 861 1 | 956 1 |
| 862 #else | 957 #else |
| 863 0 | 958 0 |
| 864 #endif | 959 #endif |
| 865 }, | 960 }, |
| 866 { | 961 { |
| 867 #include "asciitab.h" | 962 #include "asciitab.h" |
| 868 #include "latin1tab.h" | 963 #include "latin1tab.h" |
| 869 }, | 964 }, |
| 870 STANDARD_VTABLE(big2_) | 965 STANDARD_VTABLE(big2_) NULL_VTABLE |
| 871 }; | 966 }; |
| 872 | 967 |
| 873 #endif | 968 #endif |
| 874 | 969 |
| 875 static const struct normal_encoding big2_encoding = { | 970 static const struct normal_encoding big2_encoding = { |
| 876 { VTABLE, 2, 0, | 971 { VTABLE, 2, 0, |
| 877 #if BYTEORDER == 4321 | 972 #if BYTEORDER == 4321 |
| 878 1 | 973 1 |
| 879 #else | 974 #else |
| 880 0 | 975 0 |
| 881 #endif | 976 #endif |
| 882 }, | 977 }, |
| 883 { | 978 { |
| 884 #define BT_COLON BT_NMSTRT | 979 #define BT_COLON BT_NMSTRT |
| 885 #include "asciitab.h" | 980 #include "asciitab.h" |
| 886 #undef BT_COLON | 981 #undef BT_COLON |
| 887 #include "latin1tab.h" | 982 #include "latin1tab.h" |
| 888 }, | 983 }, |
| 889 STANDARD_VTABLE(big2_) | 984 STANDARD_VTABLE(big2_) NULL_VTABLE |
| 890 }; | 985 }; |
| 891 | 986 |
| 892 #if BYTEORDER != 1234 | 987 #if BYTEORDER != 1234 |
| 893 | 988 |
| 894 #ifdef XML_NS | 989 #ifdef XML_NS |
| 895 | 990 |
| 896 static const struct normal_encoding internal_big2_encoding_ns = { | 991 static const struct normal_encoding internal_big2_encoding_ns = { |
| 897 { VTABLE, 2, 0, 1 }, | 992 { VTABLE, 2, 0, 1 }, |
| 898 { | 993 { |
| 899 #include "iasciitab.h" | 994 #include "iasciitab.h" |
| 900 #include "latin1tab.h" | 995 #include "latin1tab.h" |
| 901 }, | 996 }, |
| 902 STANDARD_VTABLE(big2_) | 997 STANDARD_VTABLE(big2_) NULL_VTABLE |
| 903 }; | 998 }; |
| 904 | 999 |
| 905 #endif | 1000 #endif |
| 906 | 1001 |
| 907 static const struct normal_encoding internal_big2_encoding = { | 1002 static const struct normal_encoding internal_big2_encoding = { |
| 908 { VTABLE, 2, 0, 1 }, | 1003 { VTABLE, 2, 0, 1 }, |
| 909 { | 1004 { |
| 910 #define BT_COLON BT_NMSTRT | 1005 #define BT_COLON BT_NMSTRT |
| 911 #include "iasciitab.h" | 1006 #include "iasciitab.h" |
| 912 #undef BT_COLON | 1007 #undef BT_COLON |
| 913 #include "latin1tab.h" | 1008 #include "latin1tab.h" |
| 914 }, | 1009 }, |
| 915 STANDARD_VTABLE(big2_) | 1010 STANDARD_VTABLE(big2_) NULL_VTABLE |
| 916 }; | 1011 }; |
| 917 | 1012 |
| 918 #endif | 1013 #endif |
| 919 | 1014 |
| 920 #undef PREFIX | 1015 #undef PREFIX |
| 921 | 1016 |
| 922 static int FASTCALL | 1017 static int FASTCALL |
| 923 streqci(const char *s1, const char *s2) | 1018 streqci(const char *s1, const char *s2) |
| 924 { | 1019 { |
| 925 for (;;) { | 1020 for (;;) { |
| 926 char c1 = *s1++; | 1021 char c1 = *s1++; |
| 927 char c2 = *s2++; | 1022 char c2 = *s2++; |
| 928 if (ASCII_a <= c1 && c1 <= ASCII_z) | 1023 if (ASCII_a <= c1 && c1 <= ASCII_z) |
| 929 c1 += ASCII_A - ASCII_a; | 1024 c1 += ASCII_A - ASCII_a; |
| 930 if (ASCII_a <= c2 && c2 <= ASCII_z) | 1025 if (ASCII_a <= c2 && c2 <= ASCII_z) |
| 931 c2 += ASCII_A - ASCII_a; | 1026 c2 += ASCII_A - ASCII_a; |
| 932 if (c1 != c2) | 1027 if (c1 != c2) |
| 933 return 0; | 1028 return 0; |
| 934 if (!c1) | 1029 if (!c1) |
| 935 break; | 1030 break; |
| 936 } | 1031 } |
| 937 return 1; | 1032 return 1; |
| 938 } | 1033 } |
| 939 | 1034 |
| 940 static void PTRCALL | 1035 static void PTRCALL |
| 941 initUpdatePosition(const ENCODING *enc, const char *ptr, | 1036 initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr, |
| 942 const char *end, POSITION *pos) | 1037 const char *end, POSITION *pos) |
| 943 { | 1038 { |
| 944 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); | 1039 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); |
| 945 } | 1040 } |
| 946 | 1041 |
| 947 static int | 1042 static int |
| 948 toAscii(const ENCODING *enc, const char *ptr, const char *end) | 1043 toAscii(const ENCODING *enc, const char *ptr, const char *end) |
| 949 { | 1044 { |
| 950 char buf[1]; | 1045 char buf[1]; |
| 951 char *p = buf; | 1046 char *p = buf; |
| (...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1281 } | 1376 } |
| 1282 | 1377 |
| 1283 static int PTRFASTCALL | 1378 static int PTRFASTCALL |
| 1284 unknown_isInvalid(const ENCODING *enc, const char *p) | 1379 unknown_isInvalid(const ENCODING *enc, const char *p) |
| 1285 { | 1380 { |
| 1286 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 1381 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
| 1287 int c = uenc->convert(uenc->userData, p); | 1382 int c = uenc->convert(uenc->userData, p); |
| 1288 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; | 1383 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; |
| 1289 } | 1384 } |
| 1290 | 1385 |
| 1291 static void PTRCALL | 1386 static enum XML_Convert_Result PTRCALL |
| 1292 unknown_toUtf8(const ENCODING *enc, | 1387 unknown_toUtf8(const ENCODING *enc, |
| 1293 const char **fromP, const char *fromLim, | 1388 const char **fromP, const char *fromLim, |
| 1294 char **toP, const char *toLim) | 1389 char **toP, const char *toLim) |
| 1295 { | 1390 { |
| 1296 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 1391 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
| 1297 char buf[XML_UTF8_ENCODE_MAX]; | 1392 char buf[XML_UTF8_ENCODE_MAX]; |
| 1298 for (;;) { | 1393 for (;;) { |
| 1299 const char *utf8; | 1394 const char *utf8; |
| 1300 int n; | 1395 int n; |
| 1301 if (*fromP == fromLim) | 1396 if (*fromP == fromLim) |
| 1302 break; | 1397 return XML_CONVERT_COMPLETED; |
| 1303 utf8 = uenc->utf8[(unsigned char)**fromP]; | 1398 utf8 = uenc->utf8[(unsigned char)**fromP]; |
| 1304 n = *utf8++; | 1399 n = *utf8++; |
| 1305 if (n == 0) { | 1400 if (n == 0) { |
| 1306 int c = uenc->convert(uenc->userData, *fromP); | 1401 int c = uenc->convert(uenc->userData, *fromP); |
| 1307 n = XmlUtf8Encode(c, buf); | 1402 n = XmlUtf8Encode(c, buf); |
| 1308 if (n > toLim - *toP) | 1403 if (n > toLim - *toP) |
| 1309 break; | 1404 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 1310 utf8 = buf; | 1405 utf8 = buf; |
| 1311 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] | 1406 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] |
| 1312 - (BT_LEAD2 - 2)); | 1407 - (BT_LEAD2 - 2)); |
| 1313 } | 1408 } |
| 1314 else { | 1409 else { |
| 1315 if (n > toLim - *toP) | 1410 if (n > toLim - *toP) |
| 1316 break; | 1411 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 1317 (*fromP)++; | 1412 (*fromP)++; |
| 1318 } | 1413 } |
| 1319 do { | 1414 do { |
| 1320 *(*toP)++ = *utf8++; | 1415 *(*toP)++ = *utf8++; |
| 1321 } while (--n != 0); | 1416 } while (--n != 0); |
| 1322 } | 1417 } |
| 1323 } | 1418 } |
| 1324 | 1419 |
| 1325 static void PTRCALL | 1420 static enum XML_Convert_Result PTRCALL |
| 1326 unknown_toUtf16(const ENCODING *enc, | 1421 unknown_toUtf16(const ENCODING *enc, |
| 1327 const char **fromP, const char *fromLim, | 1422 const char **fromP, const char *fromLim, |
| 1328 unsigned short **toP, const unsigned short *toLim) | 1423 unsigned short **toP, const unsigned short *toLim) |
| 1329 { | 1424 { |
| 1330 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); | 1425 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
| 1331 while (*fromP != fromLim && *toP != toLim) { | 1426 while (*fromP < fromLim && *toP < toLim) { |
| 1332 unsigned short c = uenc->utf16[(unsigned char)**fromP]; | 1427 unsigned short c = uenc->utf16[(unsigned char)**fromP]; |
| 1333 if (c == 0) { | 1428 if (c == 0) { |
| 1334 c = (unsigned short) | 1429 c = (unsigned short) |
| 1335 uenc->convert(uenc->userData, *fromP); | 1430 uenc->convert(uenc->userData, *fromP); |
| 1336 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] | 1431 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] |
| 1337 - (BT_LEAD2 - 2)); | 1432 - (BT_LEAD2 - 2)); |
| 1338 } | 1433 } |
| 1339 else | 1434 else |
| 1340 (*fromP)++; | 1435 (*fromP)++; |
| 1341 *(*toP)++ = c; | 1436 *(*toP)++ = c; |
| 1342 } | 1437 } |
| 1438 |
| 1439 if ((*toP == toLim) && (*fromP < fromLim)) |
| 1440 return XML_CONVERT_OUTPUT_EXHAUSTED; |
| 1441 else |
| 1442 return XML_CONVERT_COMPLETED; |
| 1343 } | 1443 } |
| 1344 | 1444 |
| 1345 ENCODING * | 1445 ENCODING * |
| 1346 XmlInitUnknownEncoding(void *mem, | 1446 XmlInitUnknownEncoding(void *mem, |
| 1347 int *table, | 1447 int *table, |
| 1348 CONVERTER convert, | 1448 CONVERTER convert, |
| 1349 void *userData) | 1449 void *userData) |
| 1350 { | 1450 { |
| 1351 int i; | 1451 int i; |
| 1352 struct unknown_encoding *e = (struct unknown_encoding *)mem; | 1452 struct unknown_encoding *e = (struct unknown_encoding *)mem; |
| (...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1496 static int | 1596 static int |
| 1497 initScan(const ENCODING * const *encodingTable, | 1597 initScan(const ENCODING * const *encodingTable, |
| 1498 const INIT_ENCODING *enc, | 1598 const INIT_ENCODING *enc, |
| 1499 int state, | 1599 int state, |
| 1500 const char *ptr, | 1600 const char *ptr, |
| 1501 const char *end, | 1601 const char *end, |
| 1502 const char **nextTokPtr) | 1602 const char **nextTokPtr) |
| 1503 { | 1603 { |
| 1504 const ENCODING **encPtr; | 1604 const ENCODING **encPtr; |
| 1505 | 1605 |
| 1506 if (ptr == end) | 1606 if (ptr >= end) |
| 1507 return XML_TOK_NONE; | 1607 return XML_TOK_NONE; |
| 1508 encPtr = enc->encPtr; | 1608 encPtr = enc->encPtr; |
| 1509 if (ptr + 1 == end) { | 1609 if (ptr + 1 == end) { |
| 1510 /* only a single byte available for auto-detection */ | 1610 /* only a single byte available for auto-detection */ |
| 1511 #ifndef XML_DTD /* FIXME */ | 1611 #ifndef XML_DTD /* FIXME */ |
| 1512 /* a well-formed document entity must have more than one byte */ | 1612 /* a well-formed document entity must have more than one byte */ |
| 1513 if (state != XML_CONTENT_STATE) | 1613 if (state != XML_CONTENT_STATE) |
| 1514 return XML_TOK_PARTIAL; | 1614 return XML_TOK_PARTIAL; |
| 1515 #endif | 1615 #endif |
| 1516 /* so we're parsing an external text entity... */ | 1616 /* so we're parsing an external text entity... */ |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1642 CONVERTER convert, | 1742 CONVERTER convert, |
| 1643 void *userData) | 1743 void *userData) |
| 1644 { | 1744 { |
| 1645 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); | 1745 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); |
| 1646 if (enc) | 1746 if (enc) |
| 1647 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; | 1747 ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; |
| 1648 return enc; | 1748 return enc; |
| 1649 } | 1749 } |
| 1650 | 1750 |
| 1651 #endif /* XML_NS */ | 1751 #endif /* XML_NS */ |
| OLD | NEW |