| OLD | NEW |
| 1 /* | 1 /* |
| 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the | 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the |
| 3 * XML and HTML parsers. | 3 * XML and HTML parsers. |
| 4 * | 4 * |
| 5 * See Copyright for the status of this software. | 5 * See Copyright for the status of this software. |
| 6 * | 6 * |
| 7 * daniel@veillard.com | 7 * daniel@veillard.com |
| 8 */ | 8 */ |
| 9 | 9 |
| 10 #define IN_LIBXML | 10 #define IN_LIBXML |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 48 #include <libxml/xmlIO.h> | 48 #include <libxml/xmlIO.h> |
| 49 #include <libxml/uri.h> | 49 #include <libxml/uri.h> |
| 50 #include <libxml/dict.h> | 50 #include <libxml/dict.h> |
| 51 #include <libxml/SAX.h> | 51 #include <libxml/SAX.h> |
| 52 #ifdef LIBXML_CATALOG_ENABLED | 52 #ifdef LIBXML_CATALOG_ENABLED |
| 53 #include <libxml/catalog.h> | 53 #include <libxml/catalog.h> |
| 54 #endif | 54 #endif |
| 55 #include <libxml/globals.h> | 55 #include <libxml/globals.h> |
| 56 #include <libxml/chvalid.h> | 56 #include <libxml/chvalid.h> |
| 57 | 57 |
| 58 #define CUR(ctxt) ctxt->input->cur |
| 59 #define END(ctxt) ctxt->input->end |
| 60 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) |
| 61 |
| 58 #include "buf.h" | 62 #include "buf.h" |
| 59 #include "enc.h" | 63 #include "enc.h" |
| 60 | 64 |
| 61 /* | 65 /* |
| 62 * Various global defaults for parsing | 66 * Various global defaults for parsing |
| 63 */ | 67 */ |
| 64 | 68 |
| 65 /** | 69 /** |
| 66 * xmlCheckVersion: | 70 * xmlCheckVersion: |
| 67 * @version: the include version number | 71 * @version: the include version number |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 158 } | 162 } |
| 159 | 163 |
| 160 /** | 164 /** |
| 161 * xmlErrInternal: | 165 * xmlErrInternal: |
| 162 * @ctxt: an XML parser context | 166 * @ctxt: an XML parser context |
| 163 * @msg: the error message | 167 * @msg: the error message |
| 164 * @str: error informations | 168 * @str: error informations |
| 165 * | 169 * |
| 166 * Handle an internal error | 170 * Handle an internal error |
| 167 */ | 171 */ |
| 168 static void | 172 static void LIBXML_ATTR_FORMAT(2,0) |
| 169 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) | 173 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) |
| 170 { | 174 { |
| 171 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && | 175 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| 172 (ctxt->instate == XML_PARSER_EOF)) | 176 (ctxt->instate == XML_PARSER_EOF)) |
| 173 return; | 177 return; |
| 174 if (ctxt != NULL) | 178 if (ctxt != NULL) |
| 175 ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 179 ctxt->errNo = XML_ERR_INTERNAL_ERROR; |
| 176 __xmlRaiseError(NULL, NULL, NULL, | 180 __xmlRaiseError(NULL, NULL, NULL, |
| 177 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, | 181 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, |
| 178 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, | 182 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, |
| 179 0, 0, msg, str); | 183 0, 0, msg, str); |
| 180 if (ctxt != NULL) { | 184 if (ctxt != NULL) { |
| 181 ctxt->wellFormed = 0; | 185 ctxt->wellFormed = 0; |
| 182 if (ctxt->recovery == 0) | 186 if (ctxt->recovery == 0) |
| 183 ctxt->disableSAX = 1; | 187 ctxt->disableSAX = 1; |
| 184 } | 188 } |
| 185 } | 189 } |
| 186 | 190 |
| 187 /** | 191 /** |
| 188 * xmlErrEncodingInt: | 192 * xmlErrEncodingInt: |
| 189 * @ctxt: an XML parser context | 193 * @ctxt: an XML parser context |
| 190 * @error: the error number | 194 * @error: the error number |
| 191 * @msg: the error message | 195 * @msg: the error message |
| 192 * @val: an integer value | 196 * @val: an integer value |
| 193 * | 197 * |
| 194 * n encoding error | 198 * n encoding error |
| 195 */ | 199 */ |
| 196 static void | 200 static void LIBXML_ATTR_FORMAT(3,0) |
| 197 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, | 201 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| 198 const char *msg, int val) | 202 const char *msg, int val) |
| 199 { | 203 { |
| 200 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && | 204 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| 201 (ctxt->instate == XML_PARSER_EOF)) | 205 (ctxt->instate == XML_PARSER_EOF)) |
| 202 return; | 206 return; |
| 203 if (ctxt != NULL) | 207 if (ctxt != NULL) |
| 204 ctxt->errNo = error; | 208 ctxt->errNo = error; |
| 205 __xmlRaiseError(NULL, NULL, NULL, | 209 __xmlRaiseError(NULL, NULL, NULL, |
| 206 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, | 210 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 287 * @len: an indicative size for the lookahead | 291 * @len: an indicative size for the lookahead |
| 288 * | 292 * |
| 289 * This function increase the input for the parser. It tries to | 293 * This function increase the input for the parser. It tries to |
| 290 * preserve pointers to the input buffer, and keep already read data | 294 * preserve pointers to the input buffer, and keep already read data |
| 291 * | 295 * |
| 292 * Returns the amount of char read, or -1 in case of error, 0 indicate the | 296 * Returns the amount of char read, or -1 in case of error, 0 indicate the |
| 293 * end of this entity | 297 * end of this entity |
| 294 */ | 298 */ |
| 295 int | 299 int |
| 296 xmlParserInputGrow(xmlParserInputPtr in, int len) { | 300 xmlParserInputGrow(xmlParserInputPtr in, int len) { |
| 297 size_t ret; | 301 int ret; |
| 298 size_t indx; | 302 size_t indx; |
| 299 const xmlChar *content; | 303 const xmlChar *content; |
| 300 | 304 |
| 301 if ((in == NULL) || (len < 0)) return(-1); | 305 if ((in == NULL) || (len < 0)) return(-1); |
| 302 #ifdef DEBUG_INPUT | 306 #ifdef DEBUG_INPUT |
| 303 xmlGenericError(xmlGenericErrorContext, "Grow\n"); | 307 xmlGenericError(xmlGenericErrorContext, "Grow\n"); |
| 304 #endif | 308 #endif |
| 305 if (in->buf == NULL) return(-1); | 309 if (in->buf == NULL) return(-1); |
| 306 if (in->base == NULL) return(-1); | 310 if (in->base == NULL) return(-1); |
| 307 if (in->cur == NULL) return(-1); | 311 if (in->cur == NULL) return(-1); |
| (...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 415 * Skip to the next char input char. | 419 * Skip to the next char input char. |
| 416 */ | 420 */ |
| 417 | 421 |
| 418 void | 422 void |
| 419 xmlNextChar(xmlParserCtxtPtr ctxt) | 423 xmlNextChar(xmlParserCtxtPtr ctxt) |
| 420 { | 424 { |
| 421 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || | 425 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || |
| 422 (ctxt->input == NULL)) | 426 (ctxt->input == NULL)) |
| 423 return; | 427 return; |
| 424 | 428 |
| 429 if (!(VALID_CTXT(ctxt))) { |
| 430 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); |
| 431 ctxt->errNo = XML_ERR_INTERNAL_ERROR; |
| 432 xmlStopParser(ctxt); |
| 433 return; |
| 434 } |
| 435 |
| 436 if ((*ctxt->input->cur == 0) && |
| 437 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { |
| 438 if ((ctxt->instate != XML_PARSER_COMMENT)) |
| 439 xmlPopInput(ctxt); |
| 440 return; |
| 441 } |
| 442 |
| 425 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 443 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
| 426 if ((*ctxt->input->cur == 0) && | 444 const unsigned char *cur; |
| 427 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && | 445 unsigned char c; |
| 428 (ctxt->instate != XML_PARSER_COMMENT)) { | |
| 429 /* | |
| 430 * If we are at the end of the current entity and | |
| 431 * the context allows it, we pop consumed entities | |
| 432 * automatically. | |
| 433 * the auto closing should be blocked in other cases | |
| 434 */ | |
| 435 xmlPopInput(ctxt); | |
| 436 } else { | |
| 437 const unsigned char *cur; | |
| 438 unsigned char c; | |
| 439 | 446 |
| 440 /* | 447 /* |
| 441 * 2.11 End-of-Line Handling | 448 * 2.11 End-of-Line Handling |
| 442 * the literal two-character sequence "#xD#xA" or a standalone | 449 * the literal two-character sequence "#xD#xA" or a standalone |
| 443 * literal #xD, an XML processor must pass to the application | 450 * literal #xD, an XML processor must pass to the application |
| 444 * the single character #xA. | 451 * the single character #xA. |
| 445 */ | 452 */ |
| 446 if (*(ctxt->input->cur) == '\n') { | 453 if (*(ctxt->input->cur) == '\n') { |
| 447 ctxt->input->line++; ctxt->input->col = 1; | 454 ctxt->input->line++; ctxt->input->col = 1; |
| 448 } else | 455 } else |
| 449 ctxt->input->col++; | 456 ctxt->input->col++; |
| 450 | 457 |
| 451 /* | 458 /* |
| 452 * We are supposed to handle UTF8, check it's valid | 459 * We are supposed to handle UTF8, check it's valid |
| 453 * From rfc2044: encoding of the Unicode values on UTF-8: | 460 * From rfc2044: encoding of the Unicode values on UTF-8: |
| 454 * | 461 * |
| 455 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | 462 * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
| 456 * 0000 0000-0000 007F 0xxxxxxx | 463 * 0000 0000-0000 007F 0xxxxxxx |
| 457 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | 464 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
| 458 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | 465 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
| 459 * | 466 * |
| 460 * Check for the 0x110000 limit too | 467 * Check for the 0x110000 limit too |
| 461 */ | 468 */ |
| 462 cur = ctxt->input->cur; | 469 cur = ctxt->input->cur; |
| 463 | 470 |
| 464 c = *cur; | 471 c = *cur; |
| 465 if (c & 0x80) { | 472 if (c & 0x80) { |
| 466 » if (c == 0xC0) | 473 if (c == 0xC0) |
| 467 » » goto encoding_error; | 474 » goto encoding_error; |
| 468 if (cur[1] == 0) { | 475 if (cur[1] == 0) { |
| 476 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 477 cur = ctxt->input->cur; |
| 478 } |
| 479 if ((cur[1] & 0xc0) != 0x80) |
| 480 goto encoding_error; |
| 481 if ((c & 0xe0) == 0xe0) { |
| 482 unsigned int val; |
| 483 |
| 484 if (cur[2] == 0) { |
| 469 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 470 cur = ctxt->input->cur; | 486 cur = ctxt->input->cur; |
| 471 } | 487 } |
| 472 if ((cur[1] & 0xc0) != 0x80) | 488 if ((cur[2] & 0xc0) != 0x80) |
| 473 goto encoding_error; | 489 goto encoding_error; |
| 474 if ((c & 0xe0) == 0xe0) { | 490 if ((c & 0xf0) == 0xf0) { |
| 475 unsigned int val; | 491 if (cur[3] == 0) { |
| 476 | |
| 477 if (cur[2] == 0) { | |
| 478 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 492 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 479 cur = ctxt->input->cur; | 493 cur = ctxt->input->cur; |
| 480 } | 494 } |
| 481 if ((cur[2] & 0xc0) != 0x80) | 495 if (((c & 0xf8) != 0xf0) || |
| 496 ((cur[3] & 0xc0) != 0x80)) |
| 482 goto encoding_error; | 497 goto encoding_error; |
| 483 if ((c & 0xf0) == 0xf0) { | 498 /* 4-byte code */ |
| 484 if (cur[3] == 0) { | 499 ctxt->input->cur += 4; |
| 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 500 val = (cur[0] & 0x7) << 18; |
| 486 cur = ctxt->input->cur; | 501 val |= (cur[1] & 0x3f) << 12; |
| 487 } | 502 val |= (cur[2] & 0x3f) << 6; |
| 488 if (((c & 0xf8) != 0xf0) || | 503 val |= cur[3] & 0x3f; |
| 489 ((cur[3] & 0xc0) != 0x80)) | 504 } else { |
| 490 goto encoding_error; | 505 /* 3-byte code */ |
| 491 /* 4-byte code */ | 506 ctxt->input->cur += 3; |
| 492 ctxt->input->cur += 4; | 507 val = (cur[0] & 0xf) << 12; |
| 493 val = (cur[0] & 0x7) << 18; | 508 val |= (cur[1] & 0x3f) << 6; |
| 494 val |= (cur[1] & 0x3f) << 12; | 509 val |= cur[2] & 0x3f; |
| 495 val |= (cur[2] & 0x3f) << 6; | 510 } |
| 496 val |= cur[3] & 0x3f; | 511 if (((val > 0xd7ff) && (val < 0xe000)) || |
| 497 } else { | 512 ((val > 0xfffd) && (val < 0x10000)) || |
| 498 /* 3-byte code */ | 513 (val >= 0x110000)) { |
| 499 ctxt->input->cur += 3; | 514 » » xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, |
| 500 val = (cur[0] & 0xf) << 12; | 515 » » » » "Char 0x%X out of allowed range\n", |
| 501 val |= (cur[1] & 0x3f) << 6; | 516 » » » » val); |
| 502 val |= cur[2] & 0x3f; | 517 } |
| 503 } | |
| 504 if (((val > 0xd7ff) && (val < 0xe000)) || | |
| 505 ((val > 0xfffd) && (val < 0x10000)) || | |
| 506 (val >= 0x110000)) { | |
| 507 » » » xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, | |
| 508 » » » » » "Char 0x%X out of allowed range\n", | |
| 509 » » » » » val); | |
| 510 } | |
| 511 } else | |
| 512 /* 2-byte code */ | |
| 513 ctxt->input->cur += 2; | |
| 514 } else | 518 } else |
| 515 /* 1-byte code */ | 519 /* 2-byte code */ |
| 516 ctxt->input->cur++; | 520 ctxt->input->cur += 2; |
| 521 } else |
| 522 /* 1-byte code */ |
| 523 ctxt->input->cur++; |
| 517 | 524 |
| 518 ctxt->nbChars++; | 525 ctxt->nbChars++; |
| 519 if (*ctxt->input->cur == 0) | 526 if (*ctxt->input->cur == 0) |
| 520 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 527 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 521 } | |
| 522 } else { | 528 } else { |
| 523 /* | 529 /* |
| 524 * Assume it's a fixed length encoding (1) with | 530 * Assume it's a fixed length encoding (1) with |
| 525 * a compatible encoding for the ASCII set, since | 531 * a compatible encoding for the ASCII set, since |
| 526 * XML constructs only use < 128 chars | 532 * XML constructs only use < 128 chars |
| 527 */ | 533 */ |
| 528 | 534 |
| 529 if (*(ctxt->input->cur) == '\n') { | 535 if (*(ctxt->input->cur) == '\n') { |
| 530 ctxt->input->line++; ctxt->input->col = 1; | 536 ctxt->input->line++; ctxt->input->col = 1; |
| 531 } else | 537 } else |
| (...skipping 1610 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2142 xmlKeepBlanksDefault(int val) { | 2148 xmlKeepBlanksDefault(int val) { |
| 2143 int old = xmlKeepBlanksDefaultValue; | 2149 int old = xmlKeepBlanksDefaultValue; |
| 2144 | 2150 |
| 2145 xmlKeepBlanksDefaultValue = val; | 2151 xmlKeepBlanksDefaultValue = val; |
| 2146 if (!val) xmlIndentTreeOutput = 1; | 2152 if (!val) xmlIndentTreeOutput = 1; |
| 2147 return(old); | 2153 return(old); |
| 2148 } | 2154 } |
| 2149 | 2155 |
| 2150 #define bottom_parserInternals | 2156 #define bottom_parserInternals |
| 2151 #include "elfgcchack.h" | 2157 #include "elfgcchack.h" |
| OLD | NEW |