Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: third_party/libxml/src/parserInternals.c

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: no iconv Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libxml/src/parser.c ('k') | third_party/libxml/src/pattern.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers. 3 * XML and HTML parsers.
4 * 4 *
5 * See Copyright for the status of this software. 5 * See Copyright for the status of this software.
6 * 6 *
7 * daniel@veillard.com 7 * daniel@veillard.com
8 */ 8 */
9 9
10 #define IN_LIBXML 10 #define IN_LIBXML
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 #include <libxml/xmlIO.h> 48 #include <libxml/xmlIO.h>
49 #include <libxml/uri.h> 49 #include <libxml/uri.h>
50 #include <libxml/dict.h> 50 #include <libxml/dict.h>
51 #include <libxml/SAX.h> 51 #include <libxml/SAX.h>
52 #ifdef LIBXML_CATALOG_ENABLED 52 #ifdef LIBXML_CATALOG_ENABLED
53 #include <libxml/catalog.h> 53 #include <libxml/catalog.h>
54 #endif 54 #endif
55 #include <libxml/globals.h> 55 #include <libxml/globals.h>
56 #include <libxml/chvalid.h> 56 #include <libxml/chvalid.h>
57 57
58 #include "buf.h"
59 #include "enc.h"
60
58 /* 61 /*
59 * Various global defaults for parsing 62 * Various global defaults for parsing
60 */ 63 */
61 64
62 /** 65 /**
63 * xmlCheckVersion: 66 * xmlCheckVersion:
64 * @version: the include version number 67 * @version: the include version number
65 * 68 *
66 * check the compiled lib version against the include one. 69 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application 70 * This can warn or immediately kill the application
68 */ 71 */
69 void 72 void
70 xmlCheckVersion(int version) { 73 xmlCheckVersion(int version) {
71 int myversion = (int) LIBXML_VERSION; 74 int myversion = (int) LIBXML_VERSION;
72 75
73 xmlInitParser(); 76 xmlInitParser();
74 77
75 if ((myversion / 10000) != (version / 10000)) { 78 if ((myversion / 10000) != (version / 10000)) {
76 » xmlGenericError(xmlGenericErrorContext, 79 » xmlGenericError(xmlGenericErrorContext,
77 "Fatal: program compiled against libxml %d using libxml %d\n", 80 "Fatal: program compiled against libxml %d using libxml %d\n",
78 (version / 10000), (myversion / 10000)); 81 (version / 10000), (myversion / 10000));
79 » fprintf(stderr, 82 » fprintf(stderr,
80 "Fatal: program compiled against libxml %d using libxml %d\n", 83 "Fatal: program compiled against libxml %d using libxml %d\n",
81 (version / 10000), (myversion / 10000)); 84 (version / 10000), (myversion / 10000));
82 } 85 }
83 if ((myversion / 100) < (version / 100)) { 86 if ((myversion / 100) < (version / 100)) {
84 » xmlGenericError(xmlGenericErrorContext, 87 » xmlGenericError(xmlGenericErrorContext,
85 "Warning: program compiled against libxml %d using older %d\n", 88 "Warning: program compiled against libxml %d using older %d\n",
86 (version / 100), (myversion / 100)); 89 (version / 100), (myversion / 100));
87 } 90 }
88 } 91 }
89 92
90 93
91 /************************************************************************ 94 /************************************************************************
92 * * 95 * *
93 * » » Some factorized error routines» » » » * 96 *» » Some factorized error routines» » » » *
94 * * 97 * *
95 ************************************************************************/ 98 ************************************************************************/
96 99
97 100
98 /** 101 /**
99 * xmlErrMemory: 102 * xmlErrMemory:
100 * @ctxt: an XML parser context 103 * @ctxt: an XML parser context
101 * @extra: extra informations 104 * @extra: extra informations
102 * 105 *
103 * Handle a redefinition of attribute error 106 * Handle a redefinition of attribute error
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
218 * 221 *
219 * Returns 0 if not, non-zero otherwise 222 * Returns 0 if not, non-zero otherwise
220 */ 223 */
221 int 224 int
222 xmlIsLetter(int c) { 225 xmlIsLetter(int c) {
223 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 226 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
224 } 227 }
225 228
226 /************************************************************************ 229 /************************************************************************
227 * * 230 * *
228 * » » Input handling functions for progressive parsing» * 231 *» » Input handling functions for progressive parsing» *
229 * * 232 * *
230 ************************************************************************/ 233 ************************************************************************/
231 234
232 /* #define DEBUG_INPUT */ 235 /* #define DEBUG_INPUT */
233 /* #define DEBUG_STACK */ 236 /* #define DEBUG_STACK */
234 /* #define DEBUG_PUSH */ 237 /* #define DEBUG_PUSH */
235 238
236 239
237 /* we need to keep enough input to show errors in context */ 240 /* we need to keep enough input to show errors in context */
238 #define LINE_LEN 80 241 #define LINE_LEN 80
239 242
240 #ifdef DEBUG_INPUT 243 #ifdef DEBUG_INPUT
241 #define CHECK_BUFFER(in) check_buffer(in) 244 #define CHECK_BUFFER(in) check_buffer(in)
242 245
243 static 246 static
244 void check_buffer(xmlParserInputPtr in) { 247 void check_buffer(xmlParserInputPtr in) {
245 if (in->base != in->buf->buffer->content) { 248 if (in->base != xmlBufContent(in->buf->buffer)) {
246 xmlGenericError(xmlGenericErrorContext, 249 xmlGenericError(xmlGenericErrorContext,
247 "xmlParserInput: base mismatch problem\n"); 250 "xmlParserInput: base mismatch problem\n");
248 } 251 }
249 if (in->cur < in->base) { 252 if (in->cur < in->base) {
250 xmlGenericError(xmlGenericErrorContext, 253 xmlGenericError(xmlGenericErrorContext,
251 "xmlParserInput: cur < base problem\n"); 254 "xmlParserInput: cur < base problem\n");
252 } 255 }
253 if (in->cur > in->base + in->buf->buffer->use) { 256 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
254 xmlGenericError(xmlGenericErrorContext, 257 xmlGenericError(xmlGenericErrorContext,
255 "xmlParserInput: cur > base + use problem\n"); 258 "xmlParserInput: cur > base + use problem\n");
256 } 259 }
257 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", 260 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
258 (int) in, (int) in->buf->buffer->content, in->cur - in->base, 261 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
259 » in->buf->buffer->use, in->buf->buffer->size); 262 » xmlBufUse(in->buf->buffer));
260 } 263 }
261 264
262 #else 265 #else
263 #define CHECK_BUFFER(in) 266 #define CHECK_BUFFER(in)
264 #endif 267 #endif
265 268
266 269
267 /** 270 /**
268 * xmlParserInputRead: 271 * xmlParserInputRead:
269 * @in: an XML parser input 272 * @in: an XML parser input
270 * @len: an indicative size for the lookahead 273 * @len: an indicative size for the lookahead
271 * 274 *
272 * This function refresh the input for the parser. It doesn't try to 275 * This function was internal and is deprecated.
273 * preserve pointers to the input buffer, and discard already read data
274 * 276 *
275 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 277 * Returns -1 as this is an error to use it.
276 * end of this entity
277 */ 278 */
278 int 279 int
279 xmlParserInputRead(xmlParserInputPtr in, int len) { 280 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS ED) {
280 int ret; 281 return(-1);
281 int used;
282 int indx;
283
284 if (in == NULL) return(-1);
285 #ifdef DEBUG_INPUT
286 xmlGenericError(xmlGenericErrorContext, "Read\n");
287 #endif
288 if (in->buf == NULL) return(-1);
289 if (in->base == NULL) return(-1);
290 if (in->cur == NULL) return(-1);
291 if (in->buf->buffer == NULL) return(-1);
292 if (in->buf->readcallback == NULL) return(-1);
293
294 CHECK_BUFFER(in);
295
296 used = in->cur - in->buf->buffer->content;
297 ret = xmlBufferShrink(in->buf->buffer, used);
298 if (ret > 0) {
299 » in->cur -= ret;
300 » in->consumed += ret;
301 }
302 ret = xmlParserInputBufferRead(in->buf, len);
303 if (in->base != in->buf->buffer->content) {
304 /*
305 » * the buffer has been reallocated
306 » */
307 » indx = in->cur - in->base;
308 » in->base = in->buf->buffer->content;
309 » in->cur = &in->buf->buffer->content[indx];
310 }
311 in->end = &in->buf->buffer->content[in->buf->buffer->use];
312
313 CHECK_BUFFER(in);
314
315 return(ret);
316 } 282 }
317 283
318 /** 284 /**
319 * xmlParserInputGrow: 285 * xmlParserInputGrow:
320 * @in: an XML parser input 286 * @in: an XML parser input
321 * @len: an indicative size for the lookahead 287 * @len: an indicative size for the lookahead
322 * 288 *
323 * This function increase the input for the parser. It tries to 289 * This function increase the input for the parser. It tries to
324 * preserve pointers to the input buffer, and keep already read data 290 * preserve pointers to the input buffer, and keep already read data
325 * 291 *
326 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 292 * Returns the amount of char read, or -1 in case of error, 0 indicate the
327 * end of this entity 293 * end of this entity
328 */ 294 */
329 int 295 int
330 xmlParserInputGrow(xmlParserInputPtr in, int len) { 296 xmlParserInputGrow(xmlParserInputPtr in, int len) {
331 int ret; 297 size_t ret;
332 int indx; 298 size_t indx;
299 const xmlChar *content;
333 300
334 if (in == NULL) return(-1); 301 if ((in == NULL) || (len < 0)) return(-1);
335 #ifdef DEBUG_INPUT 302 #ifdef DEBUG_INPUT
336 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 303 xmlGenericError(xmlGenericErrorContext, "Grow\n");
337 #endif 304 #endif
338 if (in->buf == NULL) return(-1); 305 if (in->buf == NULL) return(-1);
339 if (in->base == NULL) return(-1); 306 if (in->base == NULL) return(-1);
340 if (in->cur == NULL) return(-1); 307 if (in->cur == NULL) return(-1);
341 if (in->buf->buffer == NULL) return(-1); 308 if (in->buf->buffer == NULL) return(-1);
342 309
343 CHECK_BUFFER(in); 310 CHECK_BUFFER(in);
344 311
345 indx = in->cur - in->base; 312 indx = in->cur - in->base;
346 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { 313 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
347 314
348 CHECK_BUFFER(in); 315 CHECK_BUFFER(in);
349 316
350 return(0); 317 return(0);
351 } 318 }
352 if (in->buf->readcallback != NULL) 319 if (in->buf->readcallback != NULL) {
353 ret = xmlParserInputBufferGrow(in->buf, len); 320 ret = xmlParserInputBufferGrow(in->buf, len);
354 else» 321 } else
355 return(0); 322 return(0);
356 323
357 /* 324 /*
358 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 325 * NOTE : in->base may be a "dangling" i.e. freed pointer in this
359 * block, but we use it really as an integer to do some 326 * block, but we use it really as an integer to do some
360 * pointer arithmetic. Insure will raise it as a bug but in 327 * pointer arithmetic. Insure will raise it as a bug but in
361 * that specific case, that's not ! 328 * that specific case, that's not !
362 */ 329 */
363 if (in->base != in->buf->buffer->content) { 330
331 content = xmlBufContent(in->buf->buffer);
332 if (in->base != content) {
364 /* 333 /*
365 * the buffer has been reallocated 334 * the buffer has been reallocated
366 */ 335 */
367 indx = in->cur - in->base; 336 indx = in->cur - in->base;
368 » in->base = in->buf->buffer->content; 337 » in->base = content;
369 » in->cur = &in->buf->buffer->content[indx]; 338 » in->cur = &content[indx];
370 } 339 }
371 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 340 in->end = xmlBufEnd(in->buf->buffer);
372 341
373 CHECK_BUFFER(in); 342 CHECK_BUFFER(in);
374 343
375 return(ret); 344 return(ret);
376 } 345 }
377 346
378 /** 347 /**
379 * xmlParserInputShrink: 348 * xmlParserInputShrink:
380 * @in: an XML parser input 349 * @in: an XML parser input
381 * 350 *
382 * This function removes used input for the parser. 351 * This function removes used input for the parser.
383 */ 352 */
384 void 353 void
385 xmlParserInputShrink(xmlParserInputPtr in) { 354 xmlParserInputShrink(xmlParserInputPtr in) {
386 int used; 355 size_t used;
387 int ret; 356 size_t ret;
388 int indx; 357 size_t indx;
358 const xmlChar *content;
389 359
390 #ifdef DEBUG_INPUT 360 #ifdef DEBUG_INPUT
391 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 361 xmlGenericError(xmlGenericErrorContext, "Shrink\n");
392 #endif 362 #endif
393 if (in == NULL) return; 363 if (in == NULL) return;
394 if (in->buf == NULL) return; 364 if (in->buf == NULL) return;
395 if (in->base == NULL) return; 365 if (in->base == NULL) return;
396 if (in->cur == NULL) return; 366 if (in->cur == NULL) return;
397 if (in->buf->buffer == NULL) return; 367 if (in->buf->buffer == NULL) return;
398 368
399 CHECK_BUFFER(in); 369 CHECK_BUFFER(in);
400 370
401 used = in->cur - in->buf->buffer->content; 371 used = in->cur - xmlBufContent(in->buf->buffer);
402 /* 372 /*
403 * Do not shrink on large buffers whose only a tiny fraction 373 * Do not shrink on large buffers whose only a tiny fraction
404 * was consumed 374 * was consumed
405 */ 375 */
406 if (used > INPUT_CHUNK) { 376 if (used > INPUT_CHUNK) {
407 » ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 377 » ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
408 if (ret > 0) { 378 if (ret > 0) {
409 in->cur -= ret; 379 in->cur -= ret;
410 in->consumed += ret; 380 in->consumed += ret;
411 } 381 }
412 » in->end = &in->buf->buffer->content[in->buf->buffer->use]; 382 » in->end = xmlBufEnd(in->buf->buffer);
413 } 383 }
414 384
415 CHECK_BUFFER(in); 385 CHECK_BUFFER(in);
416 386
417 if (in->buf->buffer->use > INPUT_CHUNK) { 387 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
418 return; 388 return;
419 } 389 }
420 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 390 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
421 if (in->base != in->buf->buffer->content) { 391 content = xmlBufContent(in->buf->buffer);
392 if (in->base != content) {
422 /* 393 /*
423 * the buffer has been reallocated 394 * the buffer has been reallocated
424 */ 395 */
425 indx = in->cur - in->base; 396 indx = in->cur - in->base;
426 » in->base = in->buf->buffer->content; 397 » in->base = content;
427 » in->cur = &in->buf->buffer->content[indx]; 398 » in->cur = &content[indx];
428 } 399 }
429 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 400 in->end = xmlBufEnd(in->buf->buffer);
430 401
431 CHECK_BUFFER(in); 402 CHECK_BUFFER(in);
432 } 403 }
433 404
434 /************************************************************************ 405 /************************************************************************
435 * * 406 * *
436 * » » UTF8 character input and related functions» » * 407 *» » UTF8 character input and related functions» » *
437 * * 408 * *
438 ************************************************************************/ 409 ************************************************************************/
439 410
440 /** 411 /**
441 * xmlNextChar: 412 * xmlNextChar:
442 * @ctxt: the XML parser context 413 * @ctxt: the XML parser context
443 * 414 *
444 * Skip to the next char input char. 415 * Skip to the next char input char.
445 */ 416 */
446 417
(...skipping 30 matching lines...) Expand all
477 } else 448 } else
478 ctxt->input->col++; 449 ctxt->input->col++;
479 450
480 /* 451 /*
481 * We are supposed to handle UTF8, check it's valid 452 * We are supposed to handle UTF8, check it's valid
482 * From rfc2044: encoding of the Unicode values on UTF-8: 453 * From rfc2044: encoding of the Unicode values on UTF-8:
483 * 454 *
484 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 455 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
485 * 0000 0000-0000 007F 0xxxxxxx 456 * 0000 0000-0000 007F 0xxxxxxx
486 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 457 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
487 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 458 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
488 * 459 *
489 * Check for the 0x110000 limit too 460 * Check for the 0x110000 limit too
490 */ 461 */
491 cur = ctxt->input->cur; 462 cur = ctxt->input->cur;
492 463
493 c = *cur; 464 c = *cur;
494 if (c & 0x80) { 465 if (c & 0x80) {
495 if (c == 0xC0) 466 if (c == 0xC0)
496 goto encoding_error; 467 goto encoding_error;
497 if (cur[1] == 0) { 468 if (cur[1] == 0) {
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
627 return((int) *ctxt->input->cur); 598 return((int) *ctxt->input->cur);
628 } 599 }
629 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 600 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
630 /* 601 /*
631 * We are supposed to handle UTF8, check it's valid 602 * We are supposed to handle UTF8, check it's valid
632 * From rfc2044: encoding of the Unicode values on UTF-8: 603 * From rfc2044: encoding of the Unicode values on UTF-8:
633 * 604 *
634 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 605 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
635 * 0000 0000-0000 007F 0xxxxxxx 606 * 0000 0000-0000 007F 0xxxxxxx
636 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 607 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
637 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 608 » * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
638 * 609 *
639 * Check for the 0x110000 limit too 610 * Check for the 0x110000 limit too
640 */ 611 */
641 const unsigned char *cur = ctxt->input->cur; 612 const unsigned char *cur = ctxt->input->cur;
642 unsigned char c; 613 unsigned char c;
643 unsigned int val; 614 unsigned int val;
644 615
645 c = *cur; 616 c = *cur;
646 if (c & 0x80) { 617 if (c & 0x80) {
647 if (((c & 0x40) == 0) || (c == 0xC0)) 618 if (((c & 0x40) == 0) || (c == 0xC0))
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
688 /* 2-byte code */ 659 /* 2-byte code */
689 *len = 2; 660 *len = 2;
690 val = (cur[0] & 0x1f) << 6; 661 val = (cur[0] & 0x1f) << 6;
691 val |= cur[1] & 0x3f; 662 val |= cur[1] & 0x3f;
692 if (val < 0x80) 663 if (val < 0x80)
693 goto encoding_error; 664 goto encoding_error;
694 } 665 }
695 if (!IS_CHAR(val)) { 666 if (!IS_CHAR(val)) {
696 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 667 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
697 "Char 0x%X out of allowed range\n", val); 668 "Char 0x%X out of allowed range\n", val);
698 » } 669 » }
699 return(val); 670 return(val);
700 } else { 671 } else {
701 /* 1-byte code */ 672 /* 1-byte code */
702 *len = 1; 673 *len = 1;
703 if (*ctxt->input->cur == 0) 674 if (*ctxt->input->cur == 0)
704 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 675 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
705 if ((*ctxt->input->cur == 0) && 676 if ((*ctxt->input->cur == 0) &&
706 (ctxt->input->end > ctxt->input->cur)) { 677 (ctxt->input->end > ctxt->input->cur)) {
707 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 678 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
708 "Char 0x0 out of allowed range\n", 0); 679 "Char 0x0 out of allowed range\n", 0);
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
752 { 723 {
753 char buffer[150]; 724 char buffer[150];
754 725
755 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 726 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
756 ctxt->input->cur[0], ctxt->input->cur[1], 727 ctxt->input->cur[0], ctxt->input->cur[1],
757 ctxt->input->cur[2], ctxt->input->cur[3]); 728 ctxt->input->cur[2], ctxt->input->cur[3]);
758 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 729 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
759 "Input is not proper UTF-8, indicate encoding !\n%s", 730 "Input is not proper UTF-8, indicate encoding !\n%s",
760 BAD_CAST buffer, NULL); 731 BAD_CAST buffer, NULL);
761 } 732 }
762 ctxt->charset = XML_CHAR_ENCODING_8859_1; 733 ctxt->charset = XML_CHAR_ENCODING_8859_1;
763 *len = 1; 734 *len = 1;
764 return((int) *ctxt->input->cur); 735 return((int) *ctxt->input->cur);
765 } 736 }
766 737
767 /** 738 /**
768 * xmlStringCurrentChar: 739 * xmlStringCurrentChar:
769 * @ctxt: the XML parser context 740 * @ctxt: the XML parser context
770 * @cur: pointer to the beginning of the char 741 * @cur: pointer to the beginning of the char
771 * @len: pointer to the length of the char read 742 * @len: pointer to the length of the char read
772 * 743 *
773 * The current char value, if using UTF-8 this may actually span multiple 744 * The current char value, if using UTF-8 this may actually span multiple
774 * bytes in the input buffer. 745 * bytes in the input buffer.
775 * 746 *
776 * Returns the current char value and its length 747 * Returns the current char value and its length
777 */ 748 */
778 749
779 int 750 int
780 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 751 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
781 { 752 {
782 if ((len == NULL) || (cur == NULL)) return(0); 753 if ((len == NULL) || (cur == NULL)) return(0);
783 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 754 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
784 /* 755 /*
785 * We are supposed to handle UTF8, check it's valid 756 * We are supposed to handle UTF8, check it's valid
786 * From rfc2044: encoding of the Unicode values on UTF-8: 757 * From rfc2044: encoding of the Unicode values on UTF-8:
787 * 758 *
788 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 759 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
789 * 0000 0000-0000 007F 0xxxxxxx 760 * 0000 0000-0000 007F 0xxxxxxx
790 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 761 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
791 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 762 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
792 * 763 *
793 * Check for the 0x110000 limit too 764 * Check for the 0x110000 limit too
794 */ 765 */
795 unsigned char c; 766 unsigned char c;
796 unsigned int val; 767 unsigned int val;
797 768
798 c = *cur; 769 c = *cur;
799 if (c & 0x80) { 770 if (c & 0x80) {
800 if ((cur[1] & 0xc0) != 0x80) 771 if ((cur[1] & 0xc0) != 0x80)
801 goto encoding_error; 772 goto encoding_error;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
874 } 845 }
875 *len = 1; 846 *len = 1;
876 return ((int) *cur); 847 return ((int) *cur);
877 } 848 }
878 849
879 /** 850 /**
880 * xmlCopyCharMultiByte: 851 * xmlCopyCharMultiByte:
881 * @out: pointer to an array of xmlChar 852 * @out: pointer to an array of xmlChar
882 * @val: the char value 853 * @val: the char value
883 * 854 *
884 * append the char value in the array 855 * append the char value in the array
885 * 856 *
886 * Returns the number of xmlChar written 857 * Returns the number of xmlChar written
887 */ 858 */
888 int 859 int
889 xmlCopyCharMultiByte(xmlChar *out, int val) { 860 xmlCopyCharMultiByte(xmlChar *out, int val) {
890 if (out == NULL) return(0); 861 if (out == NULL) return(0);
891 /* 862 /*
892 * We are supposed to handle UTF8, check it's valid 863 * We are supposed to handle UTF8, check it's valid
893 * From rfc2044: encoding of the Unicode values on UTF-8: 864 * From rfc2044: encoding of the Unicode values on UTF-8:
894 * 865 *
895 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 866 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
896 * 0000 0000-0000 007F 0xxxxxxx 867 * 0000 0000-0000 007F 0xxxxxxx
897 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 868 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
898 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 869 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
899 */ 870 */
900 if (val >= 0x80) { 871 if (val >= 0x80) {
901 xmlChar *savedout = out; 872 xmlChar *savedout = out;
902 int bits; 873 int bits;
903 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 874 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
904 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 875 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
905 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 876 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
906 else { 877 else {
907 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 878 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
908 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 879 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
909 val); 880 val);
910 return(0); 881 return(0);
911 } 882 }
912 for ( ; bits >= 0; bits-= 6) 883 for ( ; bits >= 0; bits-= 6)
913 *out++= ((val >> bits) & 0x3F) | 0x80 ; 884 *out++= ((val >> bits) & 0x3F) | 0x80 ;
914 return (out - savedout); 885 return (out - savedout);
915 } 886 }
916 *out = (xmlChar) val; 887 *out = (xmlChar) val;
917 return 1; 888 return 1;
918 } 889 }
919 890
920 /** 891 /**
921 * xmlCopyChar: 892 * xmlCopyChar:
922 * @len: Ignored, compatibility 893 * @len: Ignored, compatibility
923 * @out: pointer to an array of xmlChar 894 * @out: pointer to an array of xmlChar
924 * @val: the char value 895 * @val: the char value
925 * 896 *
926 * append the char value in the array 897 * append the char value in the array
927 * 898 *
928 * Returns the number of xmlChar written 899 * Returns the number of xmlChar written
929 */ 900 */
930 901
931 int 902 int
932 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 903 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
933 if (out == NULL) return(0); 904 if (out == NULL) return(0);
934 /* the len parameter is ignored */ 905 /* the len parameter is ignored */
935 if (val >= 0x80) { 906 if (val >= 0x80) {
936 return(xmlCopyCharMultiByte (out, val)); 907 return(xmlCopyCharMultiByte (out, val));
937 } 908 }
938 *out = (xmlChar) val; 909 *out = (xmlChar) val;
939 return 1; 910 return 1;
940 } 911 }
941 912
942 /************************************************************************ 913 /************************************************************************
943 * * 914 * *
944 * Commodity functions to switch encodings * 915 * Commodity functions to switch encodings *
945 * * 916 * *
946 ************************************************************************/ 917 ************************************************************************/
947 918
948 /* defined in encoding.c, not public */
949 int
950 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
951 xmlBufferPtr in, int len);
952
953 static int 919 static int
954 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 920 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
955 xmlCharEncodingHandlerPtr handler, int len); 921 xmlCharEncodingHandlerPtr handler, int len);
956 static int 922 static int
957 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 923 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
958 xmlCharEncodingHandlerPtr handler, int len); 924 xmlCharEncodingHandlerPtr handler, int len);
959 /** 925 /**
960 * xmlSwitchEncoding: 926 * xmlSwitchEncoding:
961 * @ctxt: the parser context 927 * @ctxt: the parser context
962 * @enc: the encoding value (number) 928 * @enc: the encoding value (number)
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
1182 */ 1148 */
1183 xmlCharEncCloseFunc(input->buf->encoder); 1149 xmlCharEncCloseFunc(input->buf->encoder);
1184 input->buf->encoder = handler; 1150 input->buf->encoder = handler;
1185 return (0); 1151 return (0);
1186 } 1152 }
1187 input->buf->encoder = handler; 1153 input->buf->encoder = handler;
1188 1154
1189 /* 1155 /*
1190 * Is there already some content down the pipe to convert ? 1156 * Is there already some content down the pipe to convert ?
1191 */ 1157 */
1192 if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { 1158 if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1193 int processed; 1159 int processed;
1194 unsigned int use; 1160 unsigned int use;
1195 1161
1196 /* 1162 /*
1197 * Specific handling of the Byte Order Mark for 1163 * Specific handling of the Byte Order Mark for
1198 * UTF-16 1164 * UTF-16
1199 */ 1165 */
1200 if ((handler->name != NULL) && 1166 if ((handler->name != NULL) &&
1201 (!strcmp(handler->name, "UTF-16LE") || 1167 (!strcmp(handler->name, "UTF-16LE") ||
1202 !strcmp(handler->name, "UTF-16")) && 1168 !strcmp(handler->name, "UTF-16")) &&
1203 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1169 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1204 input->cur += 2; 1170 input->cur += 2;
1205 } 1171 }
1206 if ((handler->name != NULL) && 1172 if ((handler->name != NULL) &&
1207 (!strcmp(handler->name, "UTF-16BE")) && 1173 (!strcmp(handler->name, "UTF-16BE")) &&
(...skipping 10 matching lines...) Expand all
1218 (input->cur[0] == 0xEF) && 1184 (input->cur[0] == 0xEF) &&
1219 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1185 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1220 input->cur += 3; 1186 input->cur += 3;
1221 } 1187 }
1222 1188
1223 /* 1189 /*
1224 * Shrink the current input buffer. 1190 * Shrink the current input buffer.
1225 * Move it as the raw buffer and create a new input buffer 1191 * Move it as the raw buffer and create a new input buffer
1226 */ 1192 */
1227 processed = input->cur - input->base; 1193 processed = input->cur - input->base;
1228 xmlBufferShrink(input->buf->buffer, processed); 1194 xmlBufShrink(input->buf->buffer, processed);
1229 input->buf->raw = input->buf->buffer; 1195 input->buf->raw = input->buf->buffer;
1230 input->buf->buffer = xmlBufferCreate(); 1196 input->buf->buffer = xmlBufCreate();
1231 input->buf->rawconsumed = processed; 1197 input->buf->rawconsumed = processed;
1232 » use = input->buf->raw->use; 1198 » use = xmlBufUse(input->buf->raw);
1233 1199
1234 if (ctxt->html) { 1200 if (ctxt->html) {
1235 /* 1201 /*
1236 * convert as much as possible of the buffer 1202 * convert as much as possible of the buffer
1237 */ 1203 */
1238 nbchars = xmlCharEncInFunc(input->buf->encoder, 1204 nbchars = xmlCharEncInput(input->buf, 1);
1239 input->buf->buffer,
1240 input->buf->raw);
1241 } else { 1205 } else {
1242 /* 1206 /*
1243 * convert just enough to get 1207 * convert just enough to get
1244 * '<?xml version="1.0" encoding="xxx"?>' 1208 * '<?xml version="1.0" encoding="xxx"?>'
1245 * parsed with the autodetected encoding 1209 * parsed with the autodetected encoding
1246 * into the parser reading buffer. 1210 * into the parser reading buffer.
1247 */ 1211 */
1248 nbchars = xmlCharEncFirstLineInt(input->buf->encoder, 1212 nbchars = xmlCharEncFirstLineInput(input->buf, len);
1249 input->buf->buffer,
1250 input->buf->raw,
1251 len);
1252 } 1213 }
1253 if (nbchars < 0) { 1214 if (nbchars < 0) {
1254 xmlErrInternal(ctxt, 1215 xmlErrInternal(ctxt,
1255 "switching encoding: encoder error\n", 1216 "switching encoding: encoder error\n",
1256 NULL); 1217 NULL);
1257 return (-1); 1218 return (-1);
1258 } 1219 }
1259 » input->buf->rawconsumed += use - input->buf->raw->use; 1220 » input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1260 input->base = input->cur = input->buf->buffer->content; 1221 xmlBufResetInput(input->buf->buffer, input);
1261 input->end = &input->base[input->buf->buffer->use];
1262
1263 } 1222 }
1264 return (0); 1223 return (0);
1265 } else if (input->length == 0) { 1224 } else if (input->length == 0) {
1266 /* 1225 /*
1267 * When parsing a static memory array one must know the 1226 * When parsing a static memory array one must know the
1268 * size to be able to convert the buffer. 1227 * size to be able to convert the buffer.
1269 */ 1228 */
1270 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1229 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1271 return (-1); 1230 return (-1);
1272 } 1231 }
(...skipping 14 matching lines...) Expand all
1287 int 1246 int
1288 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1247 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1289 xmlCharEncodingHandlerPtr handler) { 1248 xmlCharEncodingHandlerPtr handler) {
1290 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1249 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1291 } 1250 }
1292 1251
1293 /** 1252 /**
1294 * xmlSwitchToEncodingInt: 1253 * xmlSwitchToEncodingInt:
1295 * @ctxt: the parser context 1254 * @ctxt: the parser context
1296 * @handler: the encoding handler 1255 * @handler: the encoding handler
1297 * @len: the lenght to convert or -1 1256 * @len: the length to convert or -1
1298 * 1257 *
1299 * change the input functions when discovering the character encoding 1258 * change the input functions when discovering the character encoding
1300 * of a given entity, and convert only @len bytes of the output, this 1259 * of a given entity, and convert only @len bytes of the output, this
1301 * is needed on auto detect to allows any declared encoding later to 1260 * is needed on auto detect to allows any declared encoding later to
1302 * convert the actual content after the xmlDecl 1261 * convert the actual content after the xmlDecl
1303 * 1262 *
1304 * Returns 0 in case of success, -1 otherwise 1263 * Returns 0 in case of success, -1 otherwise
1305 */ 1264 */
1306 static int 1265 static int
1307 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1266 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
(...skipping 21 matching lines...) Expand all
1329 * xmlSwitchToEncoding: 1288 * xmlSwitchToEncoding:
1330 * @ctxt: the parser context 1289 * @ctxt: the parser context
1331 * @handler: the encoding handler 1290 * @handler: the encoding handler
1332 * 1291 *
1333 * change the input functions when discovering the character encoding 1292 * change the input functions when discovering the character encoding
1334 * of a given entity. 1293 * of a given entity.
1335 * 1294 *
1336 * Returns 0 in case of success, -1 otherwise 1295 * Returns 0 in case of success, -1 otherwise
1337 */ 1296 */
1338 int 1297 int
1339 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1298 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1340 { 1299 {
1341 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1300 return (xmlSwitchToEncodingInt(ctxt, handler, -1));
1342 } 1301 }
1343 1302
1344 /************************************************************************ 1303 /************************************************************************
1345 * * 1304 * *
1346 * Commodity functions to handle entities processing * 1305 * Commodity functions to handle entities processing *
1347 * * 1306 * *
1348 ************************************************************************/ 1307 ************************************************************************/
1349 1308
1350 /** 1309 /**
1351 * xmlFreeInputStream: 1310 * xmlFreeInputStream:
1352 * @input: an xmlParserInputPtr 1311 * @input: an xmlParserInputPtr
1353 * 1312 *
1354 * Free up an input stream. 1313 * Free up an input stream.
1355 */ 1314 */
1356 void 1315 void
1357 xmlFreeInputStream(xmlParserInputPtr input) { 1316 xmlFreeInputStream(xmlParserInputPtr input) {
1358 if (input == NULL) return; 1317 if (input == NULL) return;
1359 1318
1360 if (input->filename != NULL) xmlFree((char *) input->filename); 1319 if (input->filename != NULL) xmlFree((char *) input->filename);
1361 if (input->directory != NULL) xmlFree((char *) input->directory); 1320 if (input->directory != NULL) xmlFree((char *) input->directory);
1362 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1321 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1363 if (input->version != NULL) xmlFree((char *) input->version); 1322 if (input->version != NULL) xmlFree((char *) input->version);
1364 if ((input->free != NULL) && (input->base != NULL)) 1323 if ((input->free != NULL) && (input->base != NULL))
1365 input->free((xmlChar *) input->base); 1324 input->free((xmlChar *) input->base);
1366 if (input->buf != NULL) 1325 if (input->buf != NULL)
1367 xmlFreeParserInputBuffer(input->buf); 1326 xmlFreeParserInputBuffer(input->buf);
1368 xmlFree(input); 1327 xmlFree(input);
1369 } 1328 }
1370 1329
1371 /** 1330 /**
1372 * xmlNewInputStream: 1331 * xmlNewInputStream:
1373 * @ctxt: an XML parser context 1332 * @ctxt: an XML parser context
1374 * 1333 *
1375 * Create a new input stream structure 1334 * Create a new input stream structure.
1335 *
1376 * Returns the new input stream or NULL 1336 * Returns the new input stream or NULL
1377 */ 1337 */
1378 xmlParserInputPtr 1338 xmlParserInputPtr
1379 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1339 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1380 xmlParserInputPtr input; 1340 xmlParserInputPtr input;
1381 static int id = 0;
1382 1341
1383 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1342 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1384 if (input == NULL) { 1343 if (input == NULL) {
1385 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1344 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1386 return(NULL); 1345 return(NULL);
1387 } 1346 }
1388 memset(input, 0, sizeof(xmlParserInput)); 1347 memset(input, 0, sizeof(xmlParserInput));
1389 input->line = 1; 1348 input->line = 1;
1390 input->col = 1; 1349 input->col = 1;
1391 input->standalone = -1; 1350 input->standalone = -1;
1351
1392 /* 1352 /*
1393 * we don't care about thread reentrancy unicity for a single 1353 * If the context is NULL the id cannot be initialized, but that
1394 * parser context (and hence thread) is sufficient. 1354 * should not happen while parsing which is the situation where
1355 * the id is actually needed.
1395 */ 1356 */
1396 input->id = id++; 1357 if (ctxt != NULL)
1358 input->id = ctxt->input_id++;
1359
1397 return(input); 1360 return(input);
1398 } 1361 }
1399 1362
1400 /** 1363 /**
1401 * xmlNewIOInputStream: 1364 * xmlNewIOInputStream:
1402 * @ctxt: an XML parser context 1365 * @ctxt: an XML parser context
1403 * @input: an I/O Input 1366 * @input: an I/O Input
1404 * @enc: the charset encoding if known 1367 * @enc: the charset encoding if known
1405 * 1368 *
1406 * Create a new input stream structure encapsulating the @input into 1369 * Create a new input stream structure encapsulating the @input into
1407 * a stream suitable for the parser. 1370 * a stream suitable for the parser.
1408 * 1371 *
1409 * Returns the new input stream or NULL 1372 * Returns the new input stream or NULL
1410 */ 1373 */
1411 xmlParserInputPtr 1374 xmlParserInputPtr
1412 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1375 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1413 xmlCharEncoding enc) { 1376 xmlCharEncoding enc) {
1414 xmlParserInputPtr inputStream; 1377 xmlParserInputPtr inputStream;
1415 1378
1416 if (input == NULL) return(NULL); 1379 if (input == NULL) return(NULL);
1417 if (xmlParserDebugEntities) 1380 if (xmlParserDebugEntities)
1418 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1381 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1419 inputStream = xmlNewInputStream(ctxt); 1382 inputStream = xmlNewInputStream(ctxt);
1420 if (inputStream == NULL) { 1383 if (inputStream == NULL) {
1421 return(NULL); 1384 return(NULL);
1422 } 1385 }
1423 inputStream->filename = NULL; 1386 inputStream->filename = NULL;
1424 inputStream->buf = input; 1387 inputStream->buf = input;
1425 inputStream->base = inputStream->buf->buffer->content; 1388 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1426 inputStream->cur = inputStream->buf->buffer->content; 1389
1427 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1428 if (enc != XML_CHAR_ENCODING_NONE) { 1390 if (enc != XML_CHAR_ENCODING_NONE) {
1429 xmlSwitchEncoding(ctxt, enc); 1391 xmlSwitchEncoding(ctxt, enc);
1430 } 1392 }
1431 1393
1432 return(inputStream); 1394 return(inputStream);
1433 } 1395 }
1434 1396
1435 /** 1397 /**
1436 * xmlNewEntityInputStream: 1398 * xmlNewEntityInputStream:
1437 * @ctxt: an XML parser context 1399 * @ctxt: an XML parser context
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
1559 } 1521 }
1560 1522
1561 inputStream = xmlNewInputStream(ctxt); 1523 inputStream = xmlNewInputStream(ctxt);
1562 if (inputStream == NULL) 1524 if (inputStream == NULL)
1563 return(NULL); 1525 return(NULL);
1564 1526
1565 inputStream->buf = buf; 1527 inputStream->buf = buf;
1566 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1528 inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1567 if (inputStream == NULL) 1529 if (inputStream == NULL)
1568 return(NULL); 1530 return(NULL);
1569 1531
1570 if (inputStream->filename == NULL) 1532 if (inputStream->filename == NULL)
1571 URI = xmlStrdup((xmlChar *) filename); 1533 URI = xmlStrdup((xmlChar *) filename);
1572 else 1534 else
1573 URI = xmlStrdup((xmlChar *) inputStream->filename); 1535 URI = xmlStrdup((xmlChar *) inputStream->filename);
1574 directory = xmlParserGetDirectory((const char *) URI); 1536 directory = xmlParserGetDirectory((const char *) URI);
1575 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1537 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1576 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1538 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1577 if (URI != NULL) xmlFree((char *) URI); 1539 if (URI != NULL) xmlFree((char *) URI);
1578 inputStream->directory = directory; 1540 inputStream->directory = directory;
1579 1541
1580 inputStream->base = inputStream->buf->buffer->content; 1542 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1581 inputStream->cur = inputStream->buf->buffer->content;
1582 inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1583 if ((ctxt->directory == NULL) && (directory != NULL)) 1543 if ((ctxt->directory == NULL) && (directory != NULL))
1584 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1544 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1585 return(inputStream); 1545 return(inputStream);
1586 } 1546 }
1587 1547
1588 /************************************************************************ 1548 /************************************************************************
1589 * * 1549 * *
1590 * Commodity functions to handle parser contexts * 1550 * Commodity functions to handle parser contexts *
1591 * * 1551 * *
1592 ************************************************************************/ 1552 ************************************************************************/
(...skipping 18 matching lines...) Expand all
1611 } 1571 }
1612 1572
1613 xmlDefaultSAXHandlerInit(); 1573 xmlDefaultSAXHandlerInit();
1614 1574
1615 if (ctxt->dict == NULL) 1575 if (ctxt->dict == NULL)
1616 ctxt->dict = xmlDictCreate(); 1576 ctxt->dict = xmlDictCreate();
1617 if (ctxt->dict == NULL) { 1577 if (ctxt->dict == NULL) {
1618 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1578 xmlErrMemory(NULL, "cannot initialize parser context\n");
1619 return(-1); 1579 return(-1);
1620 } 1580 }
1581 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1582
1621 if (ctxt->sax == NULL) 1583 if (ctxt->sax == NULL)
1622 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1584 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1623 if (ctxt->sax == NULL) { 1585 if (ctxt->sax == NULL) {
1624 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1586 xmlErrMemory(NULL, "cannot initialize parser context\n");
1625 return(-1); 1587 return(-1);
1626 } 1588 }
1627 else 1589 else
1628 xmlSAXVersion(ctxt->sax, 2); 1590 xmlSAXVersion(ctxt->sax, 2);
1629 1591
1630 ctxt->maxatts = 0; 1592 ctxt->maxatts = 0;
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
1722 ctxt->spaceNr = 1; 1684 ctxt->spaceNr = 1;
1723 ctxt->spaceMax = 10; 1685 ctxt->spaceMax = 10;
1724 ctxt->spaceTab[0] = -1; 1686 ctxt->spaceTab[0] = -1;
1725 ctxt->space = &ctxt->spaceTab[0]; 1687 ctxt->space = &ctxt->spaceTab[0];
1726 ctxt->userData = ctxt; 1688 ctxt->userData = ctxt;
1727 ctxt->myDoc = NULL; 1689 ctxt->myDoc = NULL;
1728 ctxt->wellFormed = 1; 1690 ctxt->wellFormed = 1;
1729 ctxt->nsWellFormed = 1; 1691 ctxt->nsWellFormed = 1;
1730 ctxt->valid = 1; 1692 ctxt->valid = 1;
1731 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1693 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1694 if (ctxt->loadsubset) {
1695 ctxt->options |= XML_PARSE_DTDLOAD;
1696 }
1732 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1697 ctxt->validate = xmlDoValidityCheckingDefaultValue;
1733 ctxt->pedantic = xmlPedanticParserDefaultValue; 1698 ctxt->pedantic = xmlPedanticParserDefaultValue;
1699 if (ctxt->pedantic) {
1700 ctxt->options |= XML_PARSE_PEDANTIC;
1701 }
1734 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1702 ctxt->linenumbers = xmlLineNumbersDefaultValue;
1735 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1703 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1736 if (ctxt->keepBlanks == 0) 1704 if (ctxt->keepBlanks == 0) {
1737 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1705 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1706 ctxt->options |= XML_PARSE_NOBLANKS;
1707 }
1738 1708
1739 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1709 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1740 ctxt->vctxt.userData = ctxt; 1710 ctxt->vctxt.userData = ctxt;
1741 ctxt->vctxt.error = xmlParserValidityError; 1711 ctxt->vctxt.error = xmlParserValidityError;
1742 ctxt->vctxt.warning = xmlParserValidityWarning; 1712 ctxt->vctxt.warning = xmlParserValidityWarning;
1743 if (ctxt->validate) { 1713 if (ctxt->validate) {
1744 if (xmlGetWarningsDefaultValue == 0) 1714 if (xmlGetWarningsDefaultValue == 0)
1745 ctxt->vctxt.warning = NULL; 1715 ctxt->vctxt.warning = NULL;
1746 else 1716 else
1747 ctxt->vctxt.warning = xmlParserValidityWarning; 1717 ctxt->vctxt.warning = xmlParserValidityWarning;
1748 ctxt->vctxt.nodeMax = 0; 1718 ctxt->vctxt.nodeMax = 0;
1719 ctxt->options |= XML_PARSE_DTDVALID;
1749 } 1720 }
1750 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1721 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1722 if (ctxt->replaceEntities) {
1723 ctxt->options |= XML_PARSE_NOENT;
1724 }
1751 ctxt->record_info = 0; 1725 ctxt->record_info = 0;
1752 ctxt->nbChars = 0; 1726 ctxt->nbChars = 0;
1753 ctxt->checkIndex = 0; 1727 ctxt->checkIndex = 0;
1754 ctxt->inSubset = 0; 1728 ctxt->inSubset = 0;
1755 ctxt->errNo = XML_ERR_OK; 1729 ctxt->errNo = XML_ERR_OK;
1756 ctxt->depth = 0; 1730 ctxt->depth = 0;
1757 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1731 ctxt->charset = XML_CHAR_ENCODING_UTF8;
1758 ctxt->catalogs = NULL; 1732 ctxt->catalogs = NULL;
1759 ctxt->nbentities = 0; 1733 ctxt->nbentities = 0;
1734 ctxt->sizeentities = 0;
1735 ctxt->sizeentcopy = 0;
1736 ctxt->input_id = 1;
1760 xmlInitNodeInfoSeq(&ctxt->node_seq); 1737 xmlInitNodeInfoSeq(&ctxt->node_seq);
1761 return(0); 1738 return(0);
1762 } 1739 }
1763 1740
1764 /** 1741 /**
1765 * xmlFreeParserCtxt: 1742 * xmlFreeParserCtxt:
1766 * @ctxt: an XML parser context 1743 * @ctxt: an XML parser context
1767 * 1744 *
1768 * Free all the memory used by a parser context. However the parsed 1745 * Free all the memory used by a parser context. However the parsed
1769 * document in ctxt->myDoc is not freed. 1746 * document in ctxt->myDoc is not freed.
(...skipping 25 matching lines...) Expand all
1795 if (ctxt->sax != NULL) 1772 if (ctxt->sax != NULL)
1796 #endif /* LIBXML_SAX1_ENABLED */ 1773 #endif /* LIBXML_SAX1_ENABLED */
1797 xmlFree(ctxt->sax); 1774 xmlFree(ctxt->sax);
1798 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1775 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1799 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1776 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1800 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1777 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1801 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1778 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1802 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1779 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1803 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1780 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1804 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1781 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1805 if (ctxt->attsDefault != NULL) 1782 if (ctxt->attsDefault != NULL)
1806 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1783 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1807 if (ctxt->attsSpecial != NULL) 1784 if (ctxt->attsSpecial != NULL)
1808 xmlHashFree(ctxt->attsSpecial, NULL); 1785 xmlHashFree(ctxt->attsSpecial, NULL);
1809 if (ctxt->freeElems != NULL) { 1786 if (ctxt->freeElems != NULL) {
1810 xmlNodePtr cur, next; 1787 xmlNodePtr cur, next;
1811 1788
1812 cur = ctxt->freeElems; 1789 cur = ctxt->freeElems;
1813 while (cur != NULL) { 1790 while (cur != NULL) {
1814 next = cur->next; 1791 next = cur->next;
1815 xmlFree(cur); 1792 xmlFree(cur);
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
1895 xmlCtxtReset(ctxt); 1872 xmlCtxtReset(ctxt);
1896 } 1873 }
1897 1874
1898 1875
1899 /** 1876 /**
1900 * xmlParserFindNodeInfo: 1877 * xmlParserFindNodeInfo:
1901 * @ctx: an XML parser context 1878 * @ctx: an XML parser context
1902 * @node: an XML node within the tree 1879 * @node: an XML node within the tree
1903 * 1880 *
1904 * Find the parser node info struct for a given node 1881 * Find the parser node info struct for a given node
1905 * 1882 *
1906 * Returns an xmlParserNodeInfo block pointer or NULL 1883 * Returns an xmlParserNodeInfo block pointer or NULL
1907 */ 1884 */
1908 const xmlParserNodeInfo * 1885 const xmlParserNodeInfo *
1909 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1886 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1910 { 1887 {
1911 unsigned long pos; 1888 unsigned long pos;
1912 1889
1913 if ((ctx == NULL) || (node == NULL)) 1890 if ((ctx == NULL) || (node == NULL))
1914 return (NULL); 1891 return (NULL);
1915 /* Find position where node should be at */ 1892 /* Find position where node should be at */
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1953 if (seq->buffer != NULL) 1930 if (seq->buffer != NULL)
1954 xmlFree(seq->buffer); 1931 xmlFree(seq->buffer);
1955 xmlInitNodeInfoSeq(seq); 1932 xmlInitNodeInfoSeq(seq);
1956 } 1933 }
1957 1934
1958 /** 1935 /**
1959 * xmlParserFindNodeInfoIndex: 1936 * xmlParserFindNodeInfoIndex:
1960 * @seq: a node info sequence pointer 1937 * @seq: a node info sequence pointer
1961 * @node: an XML node pointer 1938 * @node: an XML node pointer
1962 * 1939 *
1963 * 1940 *
1964 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1941 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1965 * the given node is or should be at in a sorted sequence 1942 * the given node is or should be at in a sorted sequence
1966 * 1943 *
1967 * Returns a long indicating the position of the record 1944 * Returns a long indicating the position of the record
1968 */ 1945 */
1969 unsigned long 1946 unsigned long
1970 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1947 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1971 const xmlNodePtr node) 1948 const xmlNodePtr node)
1972 { 1949 {
1973 unsigned long upper, lower, middle; 1950 unsigned long upper, lower, middle;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
2010 const xmlParserNodeInfoPtr info) 1987 const xmlParserNodeInfoPtr info)
2011 { 1988 {
2012 unsigned long pos; 1989 unsigned long pos;
2013 1990
2014 if ((ctxt == NULL) || (info == NULL)) return; 1991 if ((ctxt == NULL) || (info == NULL)) return;
2015 1992
2016 /* Find pos and check to see if node is already in the sequence */ 1993 /* Find pos and check to see if node is already in the sequence */
2017 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 1994 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2018 info->node); 1995 info->node);
2019 1996
2020 if ((pos < ctxt->node_seq.length) && 1997 if ((pos < ctxt->node_seq.length) &&
2021 (ctxt->node_seq.buffer != NULL) && 1998 (ctxt->node_seq.buffer != NULL) &&
2022 (ctxt->node_seq.buffer[pos].node == info->node)) { 1999 (ctxt->node_seq.buffer[pos].node == info->node)) {
2023 ctxt->node_seq.buffer[pos] = *info; 2000 ctxt->node_seq.buffer[pos] = *info;
2024 } 2001 }
2025 2002
2026 /* Otherwise, we need to add new node to buffer */ 2003 /* Otherwise, we need to add new node to buffer */
2027 else { 2004 else {
2028 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 2005 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2006 » (ctxt->node_seq.buffer == NULL)) {
2029 xmlParserNodeInfo *tmp_buffer; 2007 xmlParserNodeInfo *tmp_buffer;
2030 unsigned int byte_size; 2008 unsigned int byte_size;
2031 2009
2032 if (ctxt->node_seq.maximum == 0) 2010 if (ctxt->node_seq.maximum == 0)
2033 ctxt->node_seq.maximum = 2; 2011 ctxt->node_seq.maximum = 2;
2034 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2012 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2035 (2 * ctxt->node_seq.maximum)); 2013 (2 * ctxt->node_seq.maximum));
2036 2014
2037 if (ctxt->node_seq.buffer == NULL) 2015 if (ctxt->node_seq.buffer == NULL)
2038 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2016 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
(...skipping 24 matching lines...) Expand all
2063 } 2041 }
2064 } 2042 }
2065 2043
2066 /************************************************************************ 2044 /************************************************************************
2067 * * 2045 * *
2068 * Defaults settings * 2046 * Defaults settings *
2069 * * 2047 * *
2070 ************************************************************************/ 2048 ************************************************************************/
2071 /** 2049 /**
2072 * xmlPedanticParserDefault: 2050 * xmlPedanticParserDefault:
2073 * @val: int 0 or 1 2051 * @val: int 0 or 1
2074 * 2052 *
2075 * Set and return the previous value for enabling pedantic warnings. 2053 * Set and return the previous value for enabling pedantic warnings.
2076 * 2054 *
2077 * Returns the last value for 0 for no substitution, 1 for substitution. 2055 * Returns the last value for 0 for no substitution, 1 for substitution.
2078 */ 2056 */
2079 2057
2080 int 2058 int
2081 xmlPedanticParserDefault(int val) { 2059 xmlPedanticParserDefault(int val) {
2082 int old = xmlPedanticParserDefaultValue; 2060 int old = xmlPedanticParserDefaultValue;
2083 2061
2084 xmlPedanticParserDefaultValue = val; 2062 xmlPedanticParserDefaultValue = val;
2085 return(old); 2063 return(old);
2086 } 2064 }
2087 2065
2088 /** 2066 /**
2089 * xmlLineNumbersDefault: 2067 * xmlLineNumbersDefault:
2090 * @val: int 0 or 1 2068 * @val: int 0 or 1
2091 * 2069 *
2092 * Set and return the previous value for enabling line numbers in elements 2070 * Set and return the previous value for enabling line numbers in elements
2093 * contents. This may break on old application and is turned off by default. 2071 * contents. This may break on old application and is turned off by default.
2094 * 2072 *
2095 * Returns the last value for 0 for no substitution, 1 for substitution. 2073 * Returns the last value for 0 for no substitution, 1 for substitution.
2096 */ 2074 */
2097 2075
2098 int 2076 int
2099 xmlLineNumbersDefault(int val) { 2077 xmlLineNumbersDefault(int val) {
2100 int old = xmlLineNumbersDefaultValue; 2078 int old = xmlLineNumbersDefaultValue;
2101 2079
2102 xmlLineNumbersDefaultValue = val; 2080 xmlLineNumbersDefaultValue = val;
2103 return(old); 2081 return(old);
2104 } 2082 }
2105 2083
2106 /** 2084 /**
2107 * xmlSubstituteEntitiesDefault: 2085 * xmlSubstituteEntitiesDefault:
2108 * @val: int 0 or 1 2086 * @val: int 0 or 1
2109 * 2087 *
2110 * Set and return the previous value for default entity support. 2088 * Set and return the previous value for default entity support.
2111 * Initially the parser always keep entity references instead of substituting 2089 * Initially the parser always keep entity references instead of substituting
2112 * entity values in the output. This function has to be used to change the 2090 * entity values in the output. This function has to be used to change the
2113 * default parser behavior 2091 * default parser behavior
2114 * SAX::substituteEntities() has to be used for changing that on a file by 2092 * SAX::substituteEntities() has to be used for changing that on a file by
2115 * file basis. 2093 * file basis.
2116 * 2094 *
2117 * Returns the last value for 0 for no substitution, 1 for substitution. 2095 * Returns the last value for 0 for no substitution, 1 for substitution.
2118 */ 2096 */
2119 2097
2120 int 2098 int
2121 xmlSubstituteEntitiesDefault(int val) { 2099 xmlSubstituteEntitiesDefault(int val) {
2122 int old = xmlSubstituteEntitiesDefaultValue; 2100 int old = xmlSubstituteEntitiesDefaultValue;
2123 2101
2124 xmlSubstituteEntitiesDefaultValue = val; 2102 xmlSubstituteEntitiesDefaultValue = val;
2125 return(old); 2103 return(old);
2126 } 2104 }
2127 2105
2128 /** 2106 /**
2129 * xmlKeepBlanksDefault: 2107 * xmlKeepBlanksDefault:
2130 * @val: int 0 or 1 2108 * @val: int 0 or 1
2131 * 2109 *
2132 * Set and return the previous value for default blanks text nodes support. 2110 * Set and return the previous value for default blanks text nodes support.
2133 * The 1.x version of the parser used an heuristic to try to detect 2111 * The 1.x version of the parser used an heuristic to try to detect
2134 * ignorable white spaces. As a result the SAX callback was generating 2112 * ignorable white spaces. As a result the SAX callback was generating
2135 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2113 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2136 * using the DOM output text nodes containing those blanks were not generated. 2114 * using the DOM output text nodes containing those blanks were not generated.
2137 * The 2.x and later version will switch to the XML standard way and 2115 * The 2.x and later version will switch to the XML standard way and
2138 * ignorableWhitespace() are only generated when running the parser in 2116 * ignorableWhitespace() are only generated when running the parser in
2139 * validating mode and when the current element doesn't allow CDATA or 2117 * validating mode and when the current element doesn't allow CDATA or
2140 * mixed content. 2118 * mixed content.
2141 * This function is provided as a way to force the standard behavior 2119 * This function is provided as a way to force the standard behavior
2142 * on 1.X libs and to switch back to the old mode for compatibility when 2120 * on 1.X libs and to switch back to the old mode for compatibility when
2143 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2121 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2144 * by using xmlIsBlankNode() commodity function to detect the "empty" 2122 * by using xmlIsBlankNode() commodity function to detect the "empty"
2145 * nodes generated. 2123 * nodes generated.
2146 * This value also affect autogeneration of indentation when saving code 2124 * This value also affect autogeneration of indentation when saving code
2147 * if blanks sections are kept, indentation is not generated. 2125 * if blanks sections are kept, indentation is not generated.
2148 * 2126 *
2149 * Returns the last value for 0 for no substitution, 1 for substitution. 2127 * Returns the last value for 0 for no substitution, 1 for substitution.
2150 */ 2128 */
2151 2129
2152 int 2130 int
2153 xmlKeepBlanksDefault(int val) { 2131 xmlKeepBlanksDefault(int val) {
2154 int old = xmlKeepBlanksDefaultValue; 2132 int old = xmlKeepBlanksDefaultValue;
2155 2133
2156 xmlKeepBlanksDefaultValue = val; 2134 xmlKeepBlanksDefaultValue = val;
2157 if (!val) xmlIndentTreeOutput = 1; 2135 if (!val) xmlIndentTreeOutput = 1;
2158 return(old); 2136 return(old);
2159 } 2137 }
2160 2138
2161 #define bottom_parserInternals 2139 #define bottom_parserInternals
2162 #include "elfgcchack.h" 2140 #include "elfgcchack.h"
OLDNEW
« no previous file with comments | « third_party/libxml/src/parser.c ('k') | third_party/libxml/src/pattern.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698