Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(219)

Side by Side Diff: third_party/libxml/src/parserInternals.c

Issue 2010803004: Roll libxml to bdec2183f34b37ee89ae1d330c6ad2bb4d76605f (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update README.chromium Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libxml/src/parser.c ('k') | third_party/libxml/src/relaxng.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers. 3 * XML and HTML parsers.
4 * 4 *
5 * See Copyright for the status of this software. 5 * See Copyright for the status of this software.
6 * 6 *
7 * daniel@veillard.com 7 * daniel@veillard.com
8 */ 8 */
9 9
10 #define IN_LIBXML 10 #define IN_LIBXML
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 #include <libxml/xmlIO.h> 48 #include <libxml/xmlIO.h>
49 #include <libxml/uri.h> 49 #include <libxml/uri.h>
50 #include <libxml/dict.h> 50 #include <libxml/dict.h>
51 #include <libxml/SAX.h> 51 #include <libxml/SAX.h>
52 #ifdef LIBXML_CATALOG_ENABLED 52 #ifdef LIBXML_CATALOG_ENABLED
53 #include <libxml/catalog.h> 53 #include <libxml/catalog.h>
54 #endif 54 #endif
55 #include <libxml/globals.h> 55 #include <libxml/globals.h>
56 #include <libxml/chvalid.h> 56 #include <libxml/chvalid.h>
57 57
58 #define CUR(ctxt) ctxt->input->cur
59 #define END(ctxt) ctxt->input->end
60 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
61
58 #include "buf.h" 62 #include "buf.h"
59 #include "enc.h" 63 #include "enc.h"
60 64
61 /* 65 /*
62 * Various global defaults for parsing 66 * Various global defaults for parsing
63 */ 67 */
64 68
65 /** 69 /**
66 * xmlCheckVersion: 70 * xmlCheckVersion:
67 * @version: the include version number 71 * @version: the include version number
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
158 } 162 }
159 163
160 /** 164 /**
161 * xmlErrInternal: 165 * xmlErrInternal:
162 * @ctxt: an XML parser context 166 * @ctxt: an XML parser context
163 * @msg: the error message 167 * @msg: the error message
164 * @str: error informations 168 * @str: error informations
165 * 169 *
166 * Handle an internal error 170 * Handle an internal error
167 */ 171 */
168 static void 172 static void LIBXML_ATTR_FORMAT(2,0)
169 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 173 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
170 { 174 {
171 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 175 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
172 (ctxt->instate == XML_PARSER_EOF)) 176 (ctxt->instate == XML_PARSER_EOF))
173 return; 177 return;
174 if (ctxt != NULL) 178 if (ctxt != NULL)
175 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 179 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
176 __xmlRaiseError(NULL, NULL, NULL, 180 __xmlRaiseError(NULL, NULL, NULL,
177 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 181 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
178 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 182 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
179 0, 0, msg, str); 183 0, 0, msg, str);
180 if (ctxt != NULL) { 184 if (ctxt != NULL) {
181 ctxt->wellFormed = 0; 185 ctxt->wellFormed = 0;
182 if (ctxt->recovery == 0) 186 if (ctxt->recovery == 0)
183 ctxt->disableSAX = 1; 187 ctxt->disableSAX = 1;
184 } 188 }
185 } 189 }
186 190
187 /** 191 /**
188 * xmlErrEncodingInt: 192 * xmlErrEncodingInt:
189 * @ctxt: an XML parser context 193 * @ctxt: an XML parser context
190 * @error: the error number 194 * @error: the error number
191 * @msg: the error message 195 * @msg: the error message
192 * @val: an integer value 196 * @val: an integer value
193 * 197 *
194 * n encoding error 198 * n encoding error
195 */ 199 */
196 static void 200 static void LIBXML_ATTR_FORMAT(3,0)
197 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 201 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
198 const char *msg, int val) 202 const char *msg, int val)
199 { 203 {
200 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 204 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
201 (ctxt->instate == XML_PARSER_EOF)) 205 (ctxt->instate == XML_PARSER_EOF))
202 return; 206 return;
203 if (ctxt != NULL) 207 if (ctxt != NULL)
204 ctxt->errNo = error; 208 ctxt->errNo = error;
205 __xmlRaiseError(NULL, NULL, NULL, 209 __xmlRaiseError(NULL, NULL, NULL,
206 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 210 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
287 * @len: an indicative size for the lookahead 291 * @len: an indicative size for the lookahead
288 * 292 *
289 * This function increase the input for the parser. It tries to 293 * This function increase the input for the parser. It tries to
290 * preserve pointers to the input buffer, and keep already read data 294 * preserve pointers to the input buffer, and keep already read data
291 * 295 *
292 * Returns the amount of char read, or -1 in case of error, 0 indicate the 296 * Returns the amount of char read, or -1 in case of error, 0 indicate the
293 * end of this entity 297 * end of this entity
294 */ 298 */
295 int 299 int
296 xmlParserInputGrow(xmlParserInputPtr in, int len) { 300 xmlParserInputGrow(xmlParserInputPtr in, int len) {
297 size_t ret; 301 int ret;
298 size_t indx; 302 size_t indx;
299 const xmlChar *content; 303 const xmlChar *content;
300 304
301 if ((in == NULL) || (len < 0)) return(-1); 305 if ((in == NULL) || (len < 0)) return(-1);
302 #ifdef DEBUG_INPUT 306 #ifdef DEBUG_INPUT
303 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 307 xmlGenericError(xmlGenericErrorContext, "Grow\n");
304 #endif 308 #endif
305 if (in->buf == NULL) return(-1); 309 if (in->buf == NULL) return(-1);
306 if (in->base == NULL) return(-1); 310 if (in->base == NULL) return(-1);
307 if (in->cur == NULL) return(-1); 311 if (in->cur == NULL) return(-1);
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 * Skip to the next char input char. 419 * Skip to the next char input char.
416 */ 420 */
417 421
418 void 422 void
419 xmlNextChar(xmlParserCtxtPtr ctxt) 423 xmlNextChar(xmlParserCtxtPtr ctxt)
420 { 424 {
421 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 425 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
422 (ctxt->input == NULL)) 426 (ctxt->input == NULL))
423 return; 427 return;
424 428
429 if (!(VALID_CTXT(ctxt))) {
430 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
431 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
432 xmlStopParser(ctxt);
433 return;
434 }
435
436 if ((*ctxt->input->cur == 0) &&
437 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
438 if ((ctxt->instate != XML_PARSER_COMMENT))
439 xmlPopInput(ctxt);
440 return;
441 }
442
425 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 443 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
426 if ((*ctxt->input->cur == 0) && 444 const unsigned char *cur;
427 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 445 unsigned char c;
428 (ctxt->instate != XML_PARSER_COMMENT)) {
429 /*
430 * If we are at the end of the current entity and
431 * the context allows it, we pop consumed entities
432 * automatically.
433 * the auto closing should be blocked in other cases
434 */
435 xmlPopInput(ctxt);
436 } else {
437 const unsigned char *cur;
438 unsigned char c;
439 446
440 /* 447 /*
441 * 2.11 End-of-Line Handling 448 * 2.11 End-of-Line Handling
442 * the literal two-character sequence "#xD#xA" or a standalone 449 * the literal two-character sequence "#xD#xA" or a standalone
443 * literal #xD, an XML processor must pass to the application 450 * literal #xD, an XML processor must pass to the application
444 * the single character #xA. 451 * the single character #xA.
445 */ 452 */
446 if (*(ctxt->input->cur) == '\n') { 453 if (*(ctxt->input->cur) == '\n') {
447 ctxt->input->line++; ctxt->input->col = 1; 454 ctxt->input->line++; ctxt->input->col = 1;
448 } else 455 } else
449 ctxt->input->col++; 456 ctxt->input->col++;
450 457
451 /* 458 /*
452 * We are supposed to handle UTF8, check it's valid 459 * We are supposed to handle UTF8, check it's valid
453 * From rfc2044: encoding of the Unicode values on UTF-8: 460 * From rfc2044: encoding of the Unicode values on UTF-8:
454 * 461 *
455 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 462 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
456 * 0000 0000-0000 007F 0xxxxxxx 463 * 0000 0000-0000 007F 0xxxxxxx
457 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 464 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
458 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 465 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
459 * 466 *
460 * Check for the 0x110000 limit too 467 * Check for the 0x110000 limit too
461 */ 468 */
462 cur = ctxt->input->cur; 469 cur = ctxt->input->cur;
463 470
464 c = *cur; 471 c = *cur;
465 if (c & 0x80) { 472 if (c & 0x80) {
466 » if (c == 0xC0) 473 if (c == 0xC0)
467 » » goto encoding_error; 474 » goto encoding_error;
468 if (cur[1] == 0) { 475 if (cur[1] == 0) {
476 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
477 cur = ctxt->input->cur;
478 }
479 if ((cur[1] & 0xc0) != 0x80)
480 goto encoding_error;
481 if ((c & 0xe0) == 0xe0) {
482 unsigned int val;
483
484 if (cur[2] == 0) {
469 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
470 cur = ctxt->input->cur; 486 cur = ctxt->input->cur;
471 } 487 }
472 if ((cur[1] & 0xc0) != 0x80) 488 if ((cur[2] & 0xc0) != 0x80)
473 goto encoding_error; 489 goto encoding_error;
474 if ((c & 0xe0) == 0xe0) { 490 if ((c & 0xf0) == 0xf0) {
475 unsigned int val; 491 if (cur[3] == 0) {
476
477 if (cur[2] == 0) {
478 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 492 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
479 cur = ctxt->input->cur; 493 cur = ctxt->input->cur;
480 } 494 }
481 if ((cur[2] & 0xc0) != 0x80) 495 if (((c & 0xf8) != 0xf0) ||
496 ((cur[3] & 0xc0) != 0x80))
482 goto encoding_error; 497 goto encoding_error;
483 if ((c & 0xf0) == 0xf0) { 498 /* 4-byte code */
484 if (cur[3] == 0) { 499 ctxt->input->cur += 4;
485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 500 val = (cur[0] & 0x7) << 18;
486 cur = ctxt->input->cur; 501 val |= (cur[1] & 0x3f) << 12;
487 } 502 val |= (cur[2] & 0x3f) << 6;
488 if (((c & 0xf8) != 0xf0) || 503 val |= cur[3] & 0x3f;
489 ((cur[3] & 0xc0) != 0x80)) 504 } else {
490 goto encoding_error; 505 /* 3-byte code */
491 /* 4-byte code */ 506 ctxt->input->cur += 3;
492 ctxt->input->cur += 4; 507 val = (cur[0] & 0xf) << 12;
493 val = (cur[0] & 0x7) << 18; 508 val |= (cur[1] & 0x3f) << 6;
494 val |= (cur[1] & 0x3f) << 12; 509 val |= cur[2] & 0x3f;
495 val |= (cur[2] & 0x3f) << 6; 510 }
496 val |= cur[3] & 0x3f; 511 if (((val > 0xd7ff) && (val < 0xe000)) ||
497 } else { 512 ((val > 0xfffd) && (val < 0x10000)) ||
498 /* 3-byte code */ 513 (val >= 0x110000)) {
499 ctxt->input->cur += 3; 514 » » xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
500 val = (cur[0] & 0xf) << 12; 515 » » » » "Char 0x%X out of allowed range\n",
501 val |= (cur[1] & 0x3f) << 6; 516 » » » » val);
502 val |= cur[2] & 0x3f; 517 }
503 }
504 if (((val > 0xd7ff) && (val < 0xe000)) ||
505 ((val > 0xfffd) && (val < 0x10000)) ||
506 (val >= 0x110000)) {
507 » » » xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
508 » » » » » "Char 0x%X out of allowed range\n",
509 » » » » » val);
510 }
511 } else
512 /* 2-byte code */
513 ctxt->input->cur += 2;
514 } else 518 } else
515 /* 1-byte code */ 519 /* 2-byte code */
516 ctxt->input->cur++; 520 ctxt->input->cur += 2;
521 } else
522 /* 1-byte code */
523 ctxt->input->cur++;
517 524
518 ctxt->nbChars++; 525 ctxt->nbChars++;
519 if (*ctxt->input->cur == 0) 526 if (*ctxt->input->cur == 0)
520 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 527 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
521 }
522 } else { 528 } else {
523 /* 529 /*
524 * Assume it's a fixed length encoding (1) with 530 * Assume it's a fixed length encoding (1) with
525 * a compatible encoding for the ASCII set, since 531 * a compatible encoding for the ASCII set, since
526 * XML constructs only use < 128 chars 532 * XML constructs only use < 128 chars
527 */ 533 */
528 534
529 if (*(ctxt->input->cur) == '\n') { 535 if (*(ctxt->input->cur) == '\n') {
530 ctxt->input->line++; ctxt->input->col = 1; 536 ctxt->input->line++; ctxt->input->col = 1;
531 } else 537 } else
(...skipping 1610 matching lines...) Expand 10 before | Expand all | Expand 10 after
2142 xmlKeepBlanksDefault(int val) { 2148 xmlKeepBlanksDefault(int val) {
2143 int old = xmlKeepBlanksDefaultValue; 2149 int old = xmlKeepBlanksDefaultValue;
2144 2150
2145 xmlKeepBlanksDefaultValue = val; 2151 xmlKeepBlanksDefaultValue = val;
2146 if (!val) xmlIndentTreeOutput = 1; 2152 if (!val) xmlIndentTreeOutput = 1;
2147 return(old); 2153 return(old);
2148 } 2154 }
2149 2155
2150 #define bottom_parserInternals 2156 #define bottom_parserInternals
2151 #include "elfgcchack.h" 2157 #include "elfgcchack.h"
OLDNEW
« no previous file with comments | « third_party/libxml/src/parser.c ('k') | third_party/libxml/src/relaxng.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698