OLD | NEW |
1 /* | 1 /* |
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the | 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the |
3 * XML and HTML parsers. | 3 * XML and HTML parsers. |
4 * | 4 * |
5 * See Copyright for the status of this software. | 5 * See Copyright for the status of this software. |
6 * | 6 * |
7 * daniel@veillard.com | 7 * daniel@veillard.com |
8 */ | 8 */ |
9 | 9 |
10 #define IN_LIBXML | 10 #define IN_LIBXML |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 #include <libxml/xmlIO.h> | 48 #include <libxml/xmlIO.h> |
49 #include <libxml/uri.h> | 49 #include <libxml/uri.h> |
50 #include <libxml/dict.h> | 50 #include <libxml/dict.h> |
51 #include <libxml/SAX.h> | 51 #include <libxml/SAX.h> |
52 #ifdef LIBXML_CATALOG_ENABLED | 52 #ifdef LIBXML_CATALOG_ENABLED |
53 #include <libxml/catalog.h> | 53 #include <libxml/catalog.h> |
54 #endif | 54 #endif |
55 #include <libxml/globals.h> | 55 #include <libxml/globals.h> |
56 #include <libxml/chvalid.h> | 56 #include <libxml/chvalid.h> |
57 | 57 |
| 58 #define CUR(ctxt) ctxt->input->cur |
| 59 #define END(ctxt) ctxt->input->end |
| 60 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) |
| 61 |
58 #include "buf.h" | 62 #include "buf.h" |
59 #include "enc.h" | 63 #include "enc.h" |
60 | 64 |
61 /* | 65 /* |
62 * Various global defaults for parsing | 66 * Various global defaults for parsing |
63 */ | 67 */ |
64 | 68 |
65 /** | 69 /** |
66 * xmlCheckVersion: | 70 * xmlCheckVersion: |
67 * @version: the include version number | 71 * @version: the include version number |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
158 } | 162 } |
159 | 163 |
160 /** | 164 /** |
161 * xmlErrInternal: | 165 * xmlErrInternal: |
162 * @ctxt: an XML parser context | 166 * @ctxt: an XML parser context |
163 * @msg: the error message | 167 * @msg: the error message |
164 * @str: error informations | 168 * @str: error informations |
165 * | 169 * |
166 * Handle an internal error | 170 * Handle an internal error |
167 */ | 171 */ |
168 static void | 172 static void LIBXML_ATTR_FORMAT(2,0) |
169 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) | 173 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) |
170 { | 174 { |
171 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && | 175 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
172 (ctxt->instate == XML_PARSER_EOF)) | 176 (ctxt->instate == XML_PARSER_EOF)) |
173 return; | 177 return; |
174 if (ctxt != NULL) | 178 if (ctxt != NULL) |
175 ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 179 ctxt->errNo = XML_ERR_INTERNAL_ERROR; |
176 __xmlRaiseError(NULL, NULL, NULL, | 180 __xmlRaiseError(NULL, NULL, NULL, |
177 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, | 181 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, |
178 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, | 182 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, |
179 0, 0, msg, str); | 183 0, 0, msg, str); |
180 if (ctxt != NULL) { | 184 if (ctxt != NULL) { |
181 ctxt->wellFormed = 0; | 185 ctxt->wellFormed = 0; |
182 if (ctxt->recovery == 0) | 186 if (ctxt->recovery == 0) |
183 ctxt->disableSAX = 1; | 187 ctxt->disableSAX = 1; |
184 } | 188 } |
185 } | 189 } |
186 | 190 |
187 /** | 191 /** |
188 * xmlErrEncodingInt: | 192 * xmlErrEncodingInt: |
189 * @ctxt: an XML parser context | 193 * @ctxt: an XML parser context |
190 * @error: the error number | 194 * @error: the error number |
191 * @msg: the error message | 195 * @msg: the error message |
192 * @val: an integer value | 196 * @val: an integer value |
193 * | 197 * |
194 * n encoding error | 198 * n encoding error |
195 */ | 199 */ |
196 static void | 200 static void LIBXML_ATTR_FORMAT(3,0) |
197 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, | 201 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
198 const char *msg, int val) | 202 const char *msg, int val) |
199 { | 203 { |
200 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && | 204 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
201 (ctxt->instate == XML_PARSER_EOF)) | 205 (ctxt->instate == XML_PARSER_EOF)) |
202 return; | 206 return; |
203 if (ctxt != NULL) | 207 if (ctxt != NULL) |
204 ctxt->errNo = error; | 208 ctxt->errNo = error; |
205 __xmlRaiseError(NULL, NULL, NULL, | 209 __xmlRaiseError(NULL, NULL, NULL, |
206 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, | 210 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
287 * @len: an indicative size for the lookahead | 291 * @len: an indicative size for the lookahead |
288 * | 292 * |
289 * This function increase the input for the parser. It tries to | 293 * This function increase the input for the parser. It tries to |
290 * preserve pointers to the input buffer, and keep already read data | 294 * preserve pointers to the input buffer, and keep already read data |
291 * | 295 * |
292 * Returns the amount of char read, or -1 in case of error, 0 indicate the | 296 * Returns the amount of char read, or -1 in case of error, 0 indicate the |
293 * end of this entity | 297 * end of this entity |
294 */ | 298 */ |
295 int | 299 int |
296 xmlParserInputGrow(xmlParserInputPtr in, int len) { | 300 xmlParserInputGrow(xmlParserInputPtr in, int len) { |
297 size_t ret; | 301 int ret; |
298 size_t indx; | 302 size_t indx; |
299 const xmlChar *content; | 303 const xmlChar *content; |
300 | 304 |
301 if ((in == NULL) || (len < 0)) return(-1); | 305 if ((in == NULL) || (len < 0)) return(-1); |
302 #ifdef DEBUG_INPUT | 306 #ifdef DEBUG_INPUT |
303 xmlGenericError(xmlGenericErrorContext, "Grow\n"); | 307 xmlGenericError(xmlGenericErrorContext, "Grow\n"); |
304 #endif | 308 #endif |
305 if (in->buf == NULL) return(-1); | 309 if (in->buf == NULL) return(-1); |
306 if (in->base == NULL) return(-1); | 310 if (in->base == NULL) return(-1); |
307 if (in->cur == NULL) return(-1); | 311 if (in->cur == NULL) return(-1); |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
415 * Skip to the next char input char. | 419 * Skip to the next char input char. |
416 */ | 420 */ |
417 | 421 |
418 void | 422 void |
419 xmlNextChar(xmlParserCtxtPtr ctxt) | 423 xmlNextChar(xmlParserCtxtPtr ctxt) |
420 { | 424 { |
421 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || | 425 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || |
422 (ctxt->input == NULL)) | 426 (ctxt->input == NULL)) |
423 return; | 427 return; |
424 | 428 |
| 429 if (!(VALID_CTXT(ctxt))) { |
| 430 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); |
| 431 ctxt->errNo = XML_ERR_INTERNAL_ERROR; |
| 432 xmlStopParser(ctxt); |
| 433 return; |
| 434 } |
| 435 |
| 436 if ((*ctxt->input->cur == 0) && |
| 437 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { |
| 438 if ((ctxt->instate != XML_PARSER_COMMENT)) |
| 439 xmlPopInput(ctxt); |
| 440 return; |
| 441 } |
| 442 |
425 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 443 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
426 if ((*ctxt->input->cur == 0) && | 444 const unsigned char *cur; |
427 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && | 445 unsigned char c; |
428 (ctxt->instate != XML_PARSER_COMMENT)) { | |
429 /* | |
430 * If we are at the end of the current entity and | |
431 * the context allows it, we pop consumed entities | |
432 * automatically. | |
433 * the auto closing should be blocked in other cases | |
434 */ | |
435 xmlPopInput(ctxt); | |
436 } else { | |
437 const unsigned char *cur; | |
438 unsigned char c; | |
439 | 446 |
440 /* | 447 /* |
441 * 2.11 End-of-Line Handling | 448 * 2.11 End-of-Line Handling |
442 * the literal two-character sequence "#xD#xA" or a standalone | 449 * the literal two-character sequence "#xD#xA" or a standalone |
443 * literal #xD, an XML processor must pass to the application | 450 * literal #xD, an XML processor must pass to the application |
444 * the single character #xA. | 451 * the single character #xA. |
445 */ | 452 */ |
446 if (*(ctxt->input->cur) == '\n') { | 453 if (*(ctxt->input->cur) == '\n') { |
447 ctxt->input->line++; ctxt->input->col = 1; | 454 ctxt->input->line++; ctxt->input->col = 1; |
448 } else | 455 } else |
449 ctxt->input->col++; | 456 ctxt->input->col++; |
450 | 457 |
451 /* | 458 /* |
452 * We are supposed to handle UTF8, check it's valid | 459 * We are supposed to handle UTF8, check it's valid |
453 * From rfc2044: encoding of the Unicode values on UTF-8: | 460 * From rfc2044: encoding of the Unicode values on UTF-8: |
454 * | 461 * |
455 * UCS-4 range (hex.) UTF-8 octet sequence (binary) | 462 * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
456 * 0000 0000-0000 007F 0xxxxxxx | 463 * 0000 0000-0000 007F 0xxxxxxx |
457 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx | 464 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
458 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx | 465 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
459 * | 466 * |
460 * Check for the 0x110000 limit too | 467 * Check for the 0x110000 limit too |
461 */ | 468 */ |
462 cur = ctxt->input->cur; | 469 cur = ctxt->input->cur; |
463 | 470 |
464 c = *cur; | 471 c = *cur; |
465 if (c & 0x80) { | 472 if (c & 0x80) { |
466 » if (c == 0xC0) | 473 if (c == 0xC0) |
467 » » goto encoding_error; | 474 » goto encoding_error; |
468 if (cur[1] == 0) { | 475 if (cur[1] == 0) { |
| 476 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| 477 cur = ctxt->input->cur; |
| 478 } |
| 479 if ((cur[1] & 0xc0) != 0x80) |
| 480 goto encoding_error; |
| 481 if ((c & 0xe0) == 0xe0) { |
| 482 unsigned int val; |
| 483 |
| 484 if (cur[2] == 0) { |
469 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
470 cur = ctxt->input->cur; | 486 cur = ctxt->input->cur; |
471 } | 487 } |
472 if ((cur[1] & 0xc0) != 0x80) | 488 if ((cur[2] & 0xc0) != 0x80) |
473 goto encoding_error; | 489 goto encoding_error; |
474 if ((c & 0xe0) == 0xe0) { | 490 if ((c & 0xf0) == 0xf0) { |
475 unsigned int val; | 491 if (cur[3] == 0) { |
476 | |
477 if (cur[2] == 0) { | |
478 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 492 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
479 cur = ctxt->input->cur; | 493 cur = ctxt->input->cur; |
480 } | 494 } |
481 if ((cur[2] & 0xc0) != 0x80) | 495 if (((c & 0xf8) != 0xf0) || |
| 496 ((cur[3] & 0xc0) != 0x80)) |
482 goto encoding_error; | 497 goto encoding_error; |
483 if ((c & 0xf0) == 0xf0) { | 498 /* 4-byte code */ |
484 if (cur[3] == 0) { | 499 ctxt->input->cur += 4; |
485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 500 val = (cur[0] & 0x7) << 18; |
486 cur = ctxt->input->cur; | 501 val |= (cur[1] & 0x3f) << 12; |
487 } | 502 val |= (cur[2] & 0x3f) << 6; |
488 if (((c & 0xf8) != 0xf0) || | 503 val |= cur[3] & 0x3f; |
489 ((cur[3] & 0xc0) != 0x80)) | 504 } else { |
490 goto encoding_error; | 505 /* 3-byte code */ |
491 /* 4-byte code */ | 506 ctxt->input->cur += 3; |
492 ctxt->input->cur += 4; | 507 val = (cur[0] & 0xf) << 12; |
493 val = (cur[0] & 0x7) << 18; | 508 val |= (cur[1] & 0x3f) << 6; |
494 val |= (cur[1] & 0x3f) << 12; | 509 val |= cur[2] & 0x3f; |
495 val |= (cur[2] & 0x3f) << 6; | 510 } |
496 val |= cur[3] & 0x3f; | 511 if (((val > 0xd7ff) && (val < 0xe000)) || |
497 } else { | 512 ((val > 0xfffd) && (val < 0x10000)) || |
498 /* 3-byte code */ | 513 (val >= 0x110000)) { |
499 ctxt->input->cur += 3; | 514 » » xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, |
500 val = (cur[0] & 0xf) << 12; | 515 » » » » "Char 0x%X out of allowed range\n", |
501 val |= (cur[1] & 0x3f) << 6; | 516 » » » » val); |
502 val |= cur[2] & 0x3f; | 517 } |
503 } | |
504 if (((val > 0xd7ff) && (val < 0xe000)) || | |
505 ((val > 0xfffd) && (val < 0x10000)) || | |
506 (val >= 0x110000)) { | |
507 » » » xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, | |
508 » » » » » "Char 0x%X out of allowed range\n", | |
509 » » » » » val); | |
510 } | |
511 } else | |
512 /* 2-byte code */ | |
513 ctxt->input->cur += 2; | |
514 } else | 518 } else |
515 /* 1-byte code */ | 519 /* 2-byte code */ |
516 ctxt->input->cur++; | 520 ctxt->input->cur += 2; |
| 521 } else |
| 522 /* 1-byte code */ |
| 523 ctxt->input->cur++; |
517 | 524 |
518 ctxt->nbChars++; | 525 ctxt->nbChars++; |
519 if (*ctxt->input->cur == 0) | 526 if (*ctxt->input->cur == 0) |
520 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 527 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
521 } | |
522 } else { | 528 } else { |
523 /* | 529 /* |
524 * Assume it's a fixed length encoding (1) with | 530 * Assume it's a fixed length encoding (1) with |
525 * a compatible encoding for the ASCII set, since | 531 * a compatible encoding for the ASCII set, since |
526 * XML constructs only use < 128 chars | 532 * XML constructs only use < 128 chars |
527 */ | 533 */ |
528 | 534 |
529 if (*(ctxt->input->cur) == '\n') { | 535 if (*(ctxt->input->cur) == '\n') { |
530 ctxt->input->line++; ctxt->input->col = 1; | 536 ctxt->input->line++; ctxt->input->col = 1; |
531 } else | 537 } else |
(...skipping 1610 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2142 xmlKeepBlanksDefault(int val) { | 2148 xmlKeepBlanksDefault(int val) { |
2143 int old = xmlKeepBlanksDefaultValue; | 2149 int old = xmlKeepBlanksDefaultValue; |
2144 | 2150 |
2145 xmlKeepBlanksDefaultValue = val; | 2151 xmlKeepBlanksDefaultValue = val; |
2146 if (!val) xmlIndentTreeOutput = 1; | 2152 if (!val) xmlIndentTreeOutput = 1; |
2147 return(old); | 2153 return(old); |
2148 } | 2154 } |
2149 | 2155 |
2150 #define bottom_parserInternals | 2156 #define bottom_parserInternals |
2151 #include "elfgcchack.h" | 2157 #include "elfgcchack.h" |
OLD | NEW |