Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Side by Side Diff: third_party/libxml/src/HTMLtree.c

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: no iconv Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libxml/src/HTMLparser.c ('k') | third_party/libxml/src/INSTALL » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * HTMLtree.c : implementation of access function for an HTML tree. 2 * HTMLtree.c : implementation of access function for an HTML tree.
3 * 3 *
4 * See Copyright for the status of this software. 4 * See Copyright for the status of this software.
5 * 5 *
6 * daniel@veillard.com 6 * daniel@veillard.com
7 */ 7 */
8 8
9 9
10 #define IN_LIBXML 10 #define IN_LIBXML
(...skipping 12 matching lines...) Expand all
23 #include <libxml/xmlmemory.h> 23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h> 24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h> 25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h> 26 #include <libxml/entities.h>
27 #include <libxml/valid.h> 27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h> 28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h> 29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h> 30 #include <libxml/globals.h>
31 #include <libxml/uri.h> 31 #include <libxml/uri.h>
32 32
33 #include "buf.h"
34
33 /************************************************************************ 35 /************************************************************************
34 * * 36 * *
35 * » » Getting/Setting encoding meta tags» » » * 37 *» » Getting/Setting encoding meta tags» » » *
36 * * 38 * *
37 ************************************************************************/ 39 ************************************************************************/
38 40
39 /** 41 /**
40 * htmlGetMetaEncoding: 42 * htmlGetMetaEncoding:
41 * @doc: the document 43 * @doc: the document
42 * 44 *
43 * Encoding definition lookup in the Meta tags 45 * Encoding definition lookup in the Meta tags
44 * 46 *
45 * Returns the current encoding as flagged in the HTML source 47 * Returns the current encoding as flagged in the HTML source
46 */ 48 */
47 const xmlChar * 49 const xmlChar *
48 htmlGetMetaEncoding(htmlDocPtr doc) { 50 htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur; 51 htmlNodePtr cur;
50 const xmlChar *content; 52 const xmlChar *content;
51 const xmlChar *encoding; 53 const xmlChar *encoding;
52 54
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 attr = attr->next; 121 attr = attr->next;
120 } 122 }
121 } 123 }
122 } 124 }
123 cur = cur->next; 125 cur = cur->next;
124 } 126 }
125 return(NULL); 127 return(NULL);
126 128
127 found_content: 129 found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset="); 130 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL) 131 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset="); 132 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL) 133 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET="); 134 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) { 135 if (encoding != NULL) {
134 encoding += 8; 136 encoding += 8;
135 } else { 137 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset ="); 138 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 » if (encoding == NULL) 139 » if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset ="); 140 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 » if (encoding == NULL) 141 » if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); 142 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL) 143 if (encoding != NULL)
142 encoding += 9; 144 encoding += 9;
143 } 145 }
144 if (encoding != NULL) { 146 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; 147 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 } 148 }
147 return(encoding); 149 return(encoding);
148 } 150 }
149 151
150 /** 152 /**
151 * htmlSetMetaEncoding: 153 * htmlSetMetaEncoding:
152 * @doc: the document 154 * @doc: the document
153 * @encoding: the encoding string 155 * @encoding: the encoding string
154 * 156 *
155 * Sets the current encoding in the Meta tags 157 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just 158 * NOTE: this will not change the document content encoding, just
157 * the META flag associated. 159 * the META flag associated.
158 * 160 *
159 * Returns 0 in case of success and -1 in case of error 161 * Returns 0 in case of success and -1 in case of error
160 */ 162 */
161 int 163 int
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { 164 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta = NULL, head = NULL; 165 htmlNodePtr cur, meta = NULL, head = NULL;
164 const xmlChar *content = NULL; 166 const xmlChar *content = NULL;
165 char newcontent[100]; 167 char newcontent[100];
166 168
169 newcontent[0] = 0;
167 170
168 if (doc == NULL) 171 if (doc == NULL)
169 return(-1); 172 return(-1);
170 173
171 /* html isn't a real encoding it's just libxml2 way to get entities */ 174 /* html isn't a real encoding it's just libxml2 way to get entities */
172 if (!xmlStrcasecmp(encoding, BAD_CAST "html")) 175 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
173 return(-1); 176 return(-1);
174 177
175 if (encoding != NULL) { 178 if (encoding != NULL) {
176 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", 179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
237 while (attr != NULL) { 240 while (attr != NULL) {
238 if ((attr->children != NULL) && 241 if ((attr->children != NULL) &&
239 (attr->children->type == XML_TEXT_NODE) && 242 (attr->children->type == XML_TEXT_NODE) &&
240 (attr->children->next == NULL)) { 243 (attr->children->next == NULL)) {
241 value = attr->children->content; 244 value = attr->children->content;
242 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) 245 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
243 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) 246 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
244 http = 1; 247 http = 1;
245 else 248 else
246 { 249 {
247 if ((value != NULL) && 250 if ((value != NULL) &&
248 (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) 251 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
249 content = value; 252 content = value;
250 } 253 }
251 if ((http != 0) && (content != NULL)) 254 if ((http != 0) && (content != NULL))
252 break; 255 break;
253 } 256 }
254 attr = attr->next; 257 attr = attr->next;
255 } 258 }
256 if ((http != 0) && (content != NULL)) { 259 if ((http != 0) && (content != NULL)) {
257 meta = cur; 260 meta = cur;
(...skipping 13 matching lines...) Expand all
271 274
272 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); 275 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
273 if (head->children == NULL) 276 if (head->children == NULL)
274 xmlAddChild(head, meta); 277 xmlAddChild(head, meta);
275 else 278 else
276 xmlAddPrevSibling(head->children, meta); 279 xmlAddPrevSibling(head->children, meta);
277 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); 280 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
278 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); 281 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
279 } 282 }
280 } else { 283 } else {
284 /* remove the meta tag if NULL is passed */
285 if (encoding == NULL) {
286 xmlUnlinkNode(meta);
287 xmlFreeNode(meta);
288 }
281 /* change the document only if there is a real encoding change */ 289 /* change the document only if there is a real encoding change */
282 if (xmlStrcasestr(content, encoding) == NULL) { 290 else if (xmlStrcasestr(content, encoding) == NULL) {
283 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent); 291 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
284 } 292 }
285 } 293 }
286 294
287 295
288 return(0); 296 return(0);
289 } 297 }
290 298
291 /** 299 /**
292 * booleanHTMLAttrs: 300 * booleanHTMLAttrs:
293 * 301 *
294 * These are the HTML attributes which will be output 302 * These are the HTML attributes which will be output
295 * in minimized form, i.e. <option selected="selected"> will be 303 * in minimized form, i.e. <option selected="selected"> will be
296 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" 304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
297 * 305 *
298 */ 306 */
299 static const char* htmlBooleanAttrs[] = { 307 static const char* htmlBooleanAttrs[] = {
300 "checked", "compact", "declare", "defer", "disabled", "ismap", 308 "checked", "compact", "declare", "defer", "disabled", "ismap",
301 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", 309 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
302 "selected", NULL 310 "selected", NULL
303 }; 311 };
304 312
305 313
306 /** 314 /**
307 * htmlIsBooleanAttr: 315 * htmlIsBooleanAttr:
308 * @name: the name of the attribute to check 316 * @name: the name of the attribute to check
309 * 317 *
310 * Determine if a given attribute is a boolean attribute. 318 * Determine if a given attribute is a boolean attribute.
311 * 319 *
312 * returns: false if the attribute is not boolean, true otherwise. 320 * returns: false if the attribute is not boolean, true otherwise.
313 */ 321 */
314 int 322 int
315 htmlIsBooleanAttr(const xmlChar *name) 323 htmlIsBooleanAttr(const xmlChar *name)
316 { 324 {
317 int i = 0; 325 int i = 0;
318 326
319 while (htmlBooleanAttrs[i] != NULL) { 327 while (htmlBooleanAttrs[i] != NULL) {
320 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) 328 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
321 return 1; 329 return 1;
322 i++; 330 i++;
323 } 331 }
324 return 0; 332 return 0;
325 } 333 }
326 334
327 #ifdef LIBXML_OUTPUT_ENABLED 335 #ifdef LIBXML_OUTPUT_ENABLED
328 /* 336 /*
329 * private routine exported from xmlIO.c 337 * private routine exported from xmlIO.c
330 */ 338 */
331 xmlOutputBufferPtr 339 xmlOutputBufferPtr
332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); 340 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
333 /************************************************************************ 341 /************************************************************************
334 * * 342 * *
335 * » » » Output error handlers» » » » * 343 *» » » Output error handlers» » » » *
336 * * 344 * *
337 ************************************************************************/ 345 ************************************************************************/
338 /** 346 /**
339 * htmlSaveErrMemory: 347 * htmlSaveErrMemory:
340 * @extra: extra informations 348 * @extra: extra informations
341 * 349 *
342 * Handle an out of memory condition 350 * Handle an out of memory condition
343 */ 351 */
344 static void 352 static void
345 htmlSaveErrMemory(const char *extra) 353 htmlSaveErrMemory(const char *extra)
(...skipping 28 matching lines...) Expand all
374 msg = "HTML has no DOCTYPE\n"; 382 msg = "HTML has no DOCTYPE\n";
375 break; 383 break;
376 default: 384 default:
377 msg = "unexpected error number\n"; 385 msg = "unexpected error number\n";
378 } 386 }
379 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); 387 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
380 } 388 }
381 389
382 /************************************************************************ 390 /************************************************************************
383 * * 391 * *
384 * » » Dumping HTML tree content to a simple buffer» » * 392 *» » Dumping HTML tree content to a simple buffer» » *
385 * * 393 * *
386 ************************************************************************/ 394 ************************************************************************/
387 395
388 static int
389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
390 int format);
391
392 /** 396 /**
393 * htmlNodeDumpFormat: 397 * htmlBufNodeDumpFormat:
394 * @buf: the HTML buffer output 398 * @buf: the xmlBufPtr output
395 * @doc: the document 399 * @doc: the document
396 * @cur: the current node 400 * @cur: the current node
397 * @format: should formatting spaces been added 401 * @format: should formatting spaces been added
398 * 402 *
399 * Dump an HTML node, recursive behaviour,children are printed too. 403 * Dump an HTML node, recursive behaviour,children are printed too.
400 * 404 *
401 * Returns the number of byte written or -1 in case of error 405 * Returns the number of byte written or -1 in case of error
402 */ 406 */
403 static int 407 static size_t
404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, 408 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
405 int format) { 409 int format) {
406 unsigned int use; 410 size_t use;
407 int ret; 411 int ret;
408 xmlOutputBufferPtr outbuf; 412 xmlOutputBufferPtr outbuf;
409 413
410 if (cur == NULL) { 414 if (cur == NULL) {
411 return (-1); 415 return (-1);
412 } 416 }
413 if (buf == NULL) { 417 if (buf == NULL) {
414 return (-1); 418 return (-1);
415 } 419 }
416 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); 420 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
417 if (outbuf == NULL) { 421 if (outbuf == NULL) {
418 htmlSaveErrMemory("allocating HTML output buffer"); 422 htmlSaveErrMemory("allocating HTML output buffer");
419 return (-1); 423 return (-1);
420 } 424 }
421 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); 425 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
422 outbuf->buffer = buf; 426 outbuf->buffer = buf;
423 outbuf->encoder = NULL; 427 outbuf->encoder = NULL;
424 outbuf->writecallback = NULL; 428 outbuf->writecallback = NULL;
425 outbuf->closecallback = NULL; 429 outbuf->closecallback = NULL;
426 outbuf->context = NULL; 430 outbuf->context = NULL;
427 outbuf->written = 0; 431 outbuf->written = 0;
428 432
429 use = buf->use; 433 use = xmlBufUse(buf);
430 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); 434 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
431 xmlFree(outbuf); 435 xmlFree(outbuf);
432 ret = buf->use - use; 436 ret = xmlBufUse(buf) - use;
433 return (ret); 437 return (ret);
434 } 438 }
435 439
436 /** 440 /**
437 * htmlNodeDump: 441 * htmlNodeDump:
438 * @buf: the HTML buffer output 442 * @buf: the HTML buffer output
439 * @doc: the document 443 * @doc: the document
440 * @cur: the current node 444 * @cur: the current node
441 * 445 *
442 * Dump an HTML node, recursive behaviour,children are printed too, 446 * Dump an HTML node, recursive behaviour,children are printed too,
443 * and formatting returns are added. 447 * and formatting returns are added.
444 * 448 *
445 * Returns the number of byte written or -1 in case of error 449 * Returns the number of byte written or -1 in case of error
446 */ 450 */
447 int 451 int
448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { 452 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
453 xmlBufPtr buffer;
454 size_t ret;
455
456 if ((buf == NULL) || (cur == NULL))
457 return(-1);
458
449 xmlInitParser(); 459 xmlInitParser();
460 buffer = xmlBufFromBuffer(buf);
461 if (buffer == NULL)
462 return(-1);
450 463
451 return(htmlNodeDumpFormat(buf, doc, cur, 1)); 464 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
465
466 xmlBufBackToBuffer(buffer);
467
468 if (ret > INT_MAX)
469 return(-1);
470 return((int) ret);
452 } 471 }
453 472
454 /** 473 /**
455 * htmlNodeDumpFileFormat: 474 * htmlNodeDumpFileFormat:
456 * @out: the FILE pointer 475 * @out: the FILE pointer
457 * @doc: the document 476 * @doc: the document
458 * @cur: the current node 477 * @cur: the current node
459 * @encoding: the document encoding 478 * @encoding: the document encoding
460 * @format: should formatting spaces been added 479 * @format: should formatting spaces been added
461 * 480 *
(...skipping 12 matching lines...) Expand all
474 493
475 xmlInitParser(); 494 xmlInitParser();
476 495
477 if (encoding != NULL) { 496 if (encoding != NULL) {
478 xmlCharEncoding enc; 497 xmlCharEncoding enc;
479 498
480 enc = xmlParseCharEncoding(encoding); 499 enc = xmlParseCharEncoding(encoding);
481 if (enc != XML_CHAR_ENCODING_UTF8) { 500 if (enc != XML_CHAR_ENCODING_UTF8) {
482 handler = xmlFindCharEncodingHandler(encoding); 501 handler = xmlFindCharEncodingHandler(encoding);
483 if (handler == NULL) 502 if (handler == NULL)
484 » » return(-1); 503 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
485 } 504 }
486 } 505 }
487 506
488 /* 507 /*
489 * Fallback to HTML or ASCII when the encoding is unspecified 508 * Fallback to HTML or ASCII when the encoding is unspecified
490 */ 509 */
491 if (handler == NULL) 510 if (handler == NULL)
492 handler = xmlFindCharEncodingHandler("HTML"); 511 handler = xmlFindCharEncodingHandler("HTML");
493 if (handler == NULL) 512 if (handler == NULL)
494 handler = xmlFindCharEncodingHandler("ascii"); 513 handler = xmlFindCharEncodingHandler("ascii");
495 514
496 /* 515 /*
497 * save the content to a temp buffer. 516 * save the content to a temp buffer.
498 */ 517 */
499 buf = xmlOutputBufferCreateFile(out, handler); 518 buf = xmlOutputBufferCreateFile(out, handler);
500 if (buf == NULL) return(0); 519 if (buf == NULL) return(0);
501 520
502 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); 521 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
503 522
504 ret = xmlOutputBufferClose(buf); 523 ret = xmlOutputBufferClose(buf);
505 return(ret); 524 return(ret);
506 } 525 }
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
555 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 574 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
556 /* 575 /*
557 * Not supported yet 576 * Not supported yet
558 */ 577 */
559 *mem = NULL; 578 *mem = NULL;
560 *size = 0; 579 *size = 0;
561 return; 580 return;
562 } 581 }
563 582
564 handler = xmlFindCharEncodingHandler(encoding); 583 handler = xmlFindCharEncodingHandler(encoding);
565 » if (handler == NULL) { 584 » if (handler == NULL)
566 » » *mem = NULL; 585 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
567 » » *size = 0; 586
568 » » return;
569 » }
570 } else { 587 } else {
571 handler = xmlFindCharEncodingHandler(encoding); 588 handler = xmlFindCharEncodingHandler(encoding);
572 } 589 }
573 } 590 }
574 591
575 /* 592 /*
576 * Fallback to HTML or ASCII when the encoding is unspecified 593 * Fallback to HTML or ASCII when the encoding is unspecified
577 */ 594 */
578 if (handler == NULL) 595 if (handler == NULL)
579 handler = xmlFindCharEncodingHandler("HTML"); 596 handler = xmlFindCharEncodingHandler("HTML");
580 if (handler == NULL) 597 if (handler == NULL)
581 handler = xmlFindCharEncodingHandler("ascii"); 598 handler = xmlFindCharEncodingHandler("ascii");
582 599
583 buf = xmlAllocOutputBufferInternal(handler); 600 buf = xmlAllocOutputBufferInternal(handler);
584 if (buf == NULL) { 601 if (buf == NULL) {
585 *mem = NULL; 602 *mem = NULL;
586 *size = 0; 603 *size = 0;
587 return; 604 return;
588 } 605 }
589 606
590 » htmlDocContentDumpFormatOutput(buf, cur, NULL, format); 607 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
591 608
592 xmlOutputBufferFlush(buf); 609 xmlOutputBufferFlush(buf);
593 if (buf->conv != NULL) { 610 if (buf->conv != NULL) {
594 » *size = buf->conv->use; 611 » *size = xmlBufUse(buf->conv);
595 » *mem = xmlStrndup(buf->conv->content, *size); 612 » *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
596 } else { 613 } else {
597 » *size = buf->buffer->use; 614 » *size = xmlBufUse(buf->buffer);
598 » *mem = xmlStrndup(buf->buffer->content, *size); 615 » *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
599 } 616 }
600 (void)xmlOutputBufferClose(buf); 617 (void)xmlOutputBufferClose(buf);
601 } 618 }
602 619
603 /** 620 /**
604 * htmlDocDumpMemory: 621 * htmlDocDumpMemory:
605 * @cur: the document 622 * @cur: the document
606 * @mem: OUT: the memory pointer 623 * @mem: OUT: the memory pointer
607 * @size: OUT: the memory length 624 * @size: OUT: the memory length
608 * 625 *
609 * Dump an HTML document in memory and return the xmlChar * and it's size. 626 * Dump an HTML document in memory and return the xmlChar * and it's size.
610 * It's up to the caller to free the memory. 627 * It's up to the caller to free the memory.
611 */ 628 */
612 void 629 void
613 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { 630 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
614 htmlDocDumpMemoryFormat(cur, mem, size, 1); 631 htmlDocDumpMemoryFormat(cur, mem, size, 1);
615 } 632 }
616 633
617 634
618 /************************************************************************ 635 /************************************************************************
619 * * 636 * *
620 * » » Dumping HTML tree content to an I/O output buffer» * 637 *» » Dumping HTML tree content to an I/O output buffer» *
621 * * 638 * *
622 ************************************************************************/ 639 ************************************************************************/
623 640
624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); 641 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
625 642
626 /** 643 /**
627 * htmlDtdDumpOutput: 644 * htmlDtdDumpOutput:
628 * @buf: the HTML buffer output 645 * @buf: the HTML buffer output
629 * @doc: the document 646 * @doc: the document
630 * @encoding: the encoding string 647 * @encoding: the encoding string
631 * 648 *
632 * TODO: check whether encoding is needed 649 * TODO: check whether encoding is needed
633 * 650 *
634 * Dump the HTML document DTD, if any. 651 * Dump the HTML document DTD, if any.
635 */ 652 */
636 static void 653 static void
637 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, 654 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
638 const char *encoding ATTRIBUTE_UNUSED) { 655 const char *encoding ATTRIBUTE_UNUSED) {
639 xmlDtdPtr cur = doc->intSubset; 656 xmlDtdPtr cur = doc->intSubset;
640 657
641 if (cur == NULL) { 658 if (cur == NULL) {
642 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); 659 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
643 return; 660 return;
644 } 661 }
645 xmlOutputBufferWriteString(buf, "<!DOCTYPE "); 662 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
646 xmlOutputBufferWriteString(buf, (const char *)cur->name); 663 xmlOutputBufferWriteString(buf, (const char *)cur->name);
647 if (cur->ExternalID != NULL) { 664 if (cur->ExternalID != NULL) {
648 xmlOutputBufferWriteString(buf, " PUBLIC "); 665 xmlOutputBufferWriteString(buf, " PUBLIC ");
649 » xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); 666 » xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
650 if (cur->SystemID != NULL) { 667 if (cur->SystemID != NULL) {
651 xmlOutputBufferWriteString(buf, " "); 668 xmlOutputBufferWriteString(buf, " ");
652 » xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); 669 » xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
653 » } 670 » }
654 } else if (cur->SystemID != NULL) { 671 } else if (cur->SystemID != NULL) {
655 xmlOutputBufferWriteString(buf, " SYSTEM "); 672 xmlOutputBufferWriteString(buf, " SYSTEM ");
656 » xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); 673 » xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
657 } 674 }
658 xmlOutputBufferWriteString(buf, ">\n"); 675 xmlOutputBufferWriteString(buf, ">\n");
659 } 676 }
660 677
661 /** 678 /**
662 * htmlAttrDumpOutput: 679 * htmlAttrDumpOutput:
663 * @buf: the HTML buffer output 680 * @buf: the HTML buffer output
664 * @doc: the document 681 * @doc: the document
665 * @cur: the attribute pointer 682 * @cur: the attribute pointer
666 * @encoding: the encoding string 683 * @encoding: the encoding string
667 * 684 *
668 * Dump an HTML attribute 685 * Dump an HTML attribute
669 */ 686 */
670 static void 687 static void
671 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, 688 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
672 const char *encoding ATTRIBUTE_UNUSED) { 689 const char *encoding ATTRIBUTE_UNUSED) {
673 xmlChar *value; 690 xmlChar *value;
674 691
675 /* 692 /*
676 * TODO: The html output method should not escape a & character 693 * The html output method should not escape a & character
677 * occurring in an attribute value immediately followed by 694 * occurring in an attribute value immediately followed by
678 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). 695 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
696 * This is implemented in xmlEncodeEntitiesReentrant
679 */ 697 */
680 698
681 if (cur == NULL) { 699 if (cur == NULL) {
682 return; 700 return;
683 } 701 }
684 xmlOutputBufferWriteString(buf, " "); 702 xmlOutputBufferWriteString(buf, " ");
685 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { 703 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
686 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); 704 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
687 xmlOutputBufferWriteString(buf, ":"); 705 xmlOutputBufferWriteString(buf, ":");
688 } 706 }
689 xmlOutputBufferWriteString(buf, (const char *)cur->name); 707 xmlOutputBufferWriteString(buf, (const char *)cur->name);
690 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { 708 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
691 value = xmlNodeListGetString(doc, cur->children, 0); 709 value = xmlNodeListGetString(doc, cur->children, 0);
692 if (value) { 710 if (value) {
693 xmlOutputBufferWriteString(buf, "="); 711 xmlOutputBufferWriteString(buf, "=");
694 if ((cur->ns == NULL) && (cur->parent != NULL) && 712 if ((cur->ns == NULL) && (cur->parent != NULL) &&
695 (cur->parent->ns == NULL) && 713 (cur->parent->ns == NULL) &&
696 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || 714 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
697 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || 715 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
698 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || 716 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
699 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && 717 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
700 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { 718 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
701 xmlChar *escaped;
702 xmlChar *tmp = value; 719 xmlChar *tmp = value;
720 /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
721 xmlBufCCat(buf->buffer, "\"");
703 722
704 while (IS_BLANK_CH(*tmp)) tmp++; 723 while (IS_BLANK_CH(*tmp)) tmp++;
705 724
706 » » escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); 725 » » /* URI Escape everything, except server side includes. */
707 » » if (escaped != NULL) { 726 » » for ( ; ; ) {
708 » » xmlBufferWriteQuotedString(buf->buffer, escaped); 727 » » xmlChar *escaped;
709 » » xmlFree(escaped); 728 » » xmlChar endChar;
710 » » } else { 729 » » xmlChar *end = NULL;
711 » » xmlBufferWriteQuotedString(buf->buffer, value); 730 » » xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
731 » » if (start != NULL) {
732 » » » end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
733 » » » if (end != NULL) {
734 » » » *start = '\0';
735 » » » }
736 » » }
737
738 » » /* Escape the whole string, or until start (set to '\0'). */
739 » » escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
740 » » if (escaped != NULL) {
741 » » xmlBufCat(buf->buffer, escaped);
742 » » xmlFree(escaped);
743 » » } else {
744 » » xmlBufCat(buf->buffer, tmp);
745 » » }
746
747 » » if (end == NULL) { /* Everything has been written. */
748 » » » break;
749 » » }
750
751 » » /* Do not escape anything within server side includes. */
752 » » *start = '<'; /* Restore the first character of "<!--". */
753 » » end += 3; /* strlen("-->") */
754 » » endChar = *end;
755 » » *end = '\0';
756 » » xmlBufCat(buf->buffer, start);
757 » » *end = endChar;
758 » » tmp = end;
712 } 759 }
760
761 xmlBufCCat(buf->buffer, "\"");
713 } else { 762 } else {
714 » » xmlBufferWriteQuotedString(buf->buffer, value); 763 » » xmlBufWriteQuotedString(buf->buffer, value);
715 } 764 }
716 xmlFree(value); 765 xmlFree(value);
717 } else { 766 } else {
718 xmlOutputBufferWriteString(buf, "=\"\""); 767 xmlOutputBufferWriteString(buf, "=\"\"");
719 } 768 }
720 } 769 }
721 } 770 }
722 771
723 /** 772 /**
724 * htmlAttrListDumpOutput: 773 * htmlAttrListDumpOutput:
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
1054 if (enc != cur->charset) { 1103 if (enc != cur->charset) {
1055 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 1104 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1056 /* 1105 /*
1057 * Not supported yet 1106 * Not supported yet
1058 */ 1107 */
1059 return(-1); 1108 return(-1);
1060 } 1109 }
1061 1110
1062 handler = xmlFindCharEncodingHandler(encoding); 1111 handler = xmlFindCharEncodingHandler(encoding);
1063 if (handler == NULL) 1112 if (handler == NULL)
1064 » » return(-1); 1113 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1065 } else { 1114 } else {
1066 handler = xmlFindCharEncodingHandler(encoding); 1115 handler = xmlFindCharEncodingHandler(encoding);
1067 } 1116 }
1068 } 1117 }
1069 1118
1070 /* 1119 /*
1071 * Fallback to HTML or ASCII when the encoding is unspecified 1120 * Fallback to HTML or ASCII when the encoding is unspecified
1072 */ 1121 */
1073 if (handler == NULL) 1122 if (handler == NULL)
1074 handler = xmlFindCharEncodingHandler("HTML"); 1123 handler = xmlFindCharEncodingHandler("HTML");
(...skipping 19 matching lines...) Expand all
1094 */ 1143 */
1095 int 1144 int
1096 htmlSaveFile(const char *filename, xmlDocPtr cur) { 1145 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1097 xmlOutputBufferPtr buf; 1146 xmlOutputBufferPtr buf;
1098 xmlCharEncodingHandlerPtr handler = NULL; 1147 xmlCharEncodingHandlerPtr handler = NULL;
1099 const char *encoding; 1148 const char *encoding;
1100 int ret; 1149 int ret;
1101 1150
1102 if ((cur == NULL) || (filename == NULL)) 1151 if ((cur == NULL) || (filename == NULL))
1103 return(-1); 1152 return(-1);
1104 1153
1105 xmlInitParser(); 1154 xmlInitParser();
1106 1155
1107 encoding = (const char *) htmlGetMetaEncoding(cur); 1156 encoding = (const char *) htmlGetMetaEncoding(cur);
1108 1157
1109 if (encoding != NULL) { 1158 if (encoding != NULL) {
1110 xmlCharEncoding enc; 1159 xmlCharEncoding enc;
1111 1160
1112 enc = xmlParseCharEncoding(encoding); 1161 enc = xmlParseCharEncoding(encoding);
1113 if (enc != cur->charset) { 1162 if (enc != cur->charset) {
1114 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 1163 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1115 /* 1164 /*
1116 * Not supported yet 1165 * Not supported yet
1117 */ 1166 */
1118 return(-1); 1167 return(-1);
1119 } 1168 }
1120 1169
1121 handler = xmlFindCharEncodingHandler(encoding); 1170 handler = xmlFindCharEncodingHandler(encoding);
1122 if (handler == NULL) 1171 if (handler == NULL)
1123 » » return(-1); 1172 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1124 } 1173 }
1125 } 1174 }
1126 1175
1127 /* 1176 /*
1128 * Fallback to HTML or ASCII when the encoding is unspecified 1177 * Fallback to HTML or ASCII when the encoding is unspecified
1129 */ 1178 */
1130 if (handler == NULL) 1179 if (handler == NULL)
1131 handler = xmlFindCharEncodingHandler("HTML"); 1180 handler = xmlFindCharEncodingHandler("HTML");
1132 if (handler == NULL) 1181 if (handler == NULL)
1133 handler = xmlFindCharEncodingHandler("ascii"); 1182 handler = xmlFindCharEncodingHandler("ascii");
1134 1183
1135 /* 1184 /*
1136 * save the content to a temp buffer. 1185 * save the content to a temp buffer.
1137 */ 1186 */
1138 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); 1187 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1139 if (buf == NULL) return(0); 1188 if (buf == NULL) return(0);
1140 1189
1141 htmlDocContentDumpOutput(buf, cur, NULL); 1190 htmlDocContentDumpOutput(buf, cur, NULL);
1142 1191
1143 ret = xmlOutputBufferClose(buf); 1192 ret = xmlOutputBufferClose(buf);
1144 return(ret); 1193 return(ret);
1145 } 1194 }
1146 1195
1147 /** 1196 /**
1148 * htmlSaveFileFormat: 1197 * htmlSaveFileFormat:
1149 * @filename: the filename 1198 * @filename: the filename
1150 * @cur: the document 1199 * @cur: the document
1151 * @format: should formatting spaces been added 1200 * @format: should formatting spaces been added
1152 * @encoding: the document encoding 1201 * @encoding: the document encoding
1153 * 1202 *
1154 * Dump an HTML document to a file using a given encoding. 1203 * Dump an HTML document to a file using a given encoding.
1155 * 1204 *
1156 * returns: the number of byte written or -1 in case of failure. 1205 * returns: the number of byte written or -1 in case of failure.
1157 */ 1206 */
1158 int 1207 int
1159 htmlSaveFileFormat(const char *filename, xmlDocPtr cur, 1208 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1160 const char *encoding, int format) { 1209 const char *encoding, int format) {
1161 xmlOutputBufferPtr buf; 1210 xmlOutputBufferPtr buf;
1162 xmlCharEncodingHandlerPtr handler = NULL; 1211 xmlCharEncodingHandlerPtr handler = NULL;
1163 int ret; 1212 int ret;
1164 1213
1165 if ((cur == NULL) || (filename == NULL)) 1214 if ((cur == NULL) || (filename == NULL))
1166 return(-1); 1215 return(-1);
1167 1216
1168 xmlInitParser(); 1217 xmlInitParser();
1169 1218
1170 if (encoding != NULL) { 1219 if (encoding != NULL) {
1171 xmlCharEncoding enc; 1220 xmlCharEncoding enc;
1172 1221
1173 enc = xmlParseCharEncoding(encoding); 1222 enc = xmlParseCharEncoding(encoding);
1174 if (enc != cur->charset) { 1223 if (enc != cur->charset) {
1175 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 1224 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1176 /* 1225 /*
1177 * Not supported yet 1226 * Not supported yet
1178 */ 1227 */
1179 return(-1); 1228 return(-1);
1180 } 1229 }
1181 1230
1182 handler = xmlFindCharEncodingHandler(encoding); 1231 handler = xmlFindCharEncodingHandler(encoding);
1183 if (handler == NULL) 1232 if (handler == NULL)
1184 » » return(-1); 1233 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1185 } 1234 }
1186 htmlSetMetaEncoding(cur, (const xmlChar *) encoding); 1235 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1187 } else { 1236 } else {
1188 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); 1237 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1189 } 1238 }
1190 1239
1191 /* 1240 /*
1192 * Fallback to HTML or ASCII when the encoding is unspecified 1241 * Fallback to HTML or ASCII when the encoding is unspecified
1193 */ 1242 */
1194 if (handler == NULL) 1243 if (handler == NULL)
1195 handler = xmlFindCharEncodingHandler("HTML"); 1244 handler = xmlFindCharEncodingHandler("HTML");
1196 if (handler == NULL) 1245 if (handler == NULL)
1197 handler = xmlFindCharEncodingHandler("ascii"); 1246 handler = xmlFindCharEncodingHandler("ascii");
1198 1247
1199 /* 1248 /*
1200 * save the content to a temp buffer. 1249 * save the content to a temp buffer.
1201 */ 1250 */
1202 buf = xmlOutputBufferCreateFilename(filename, handler, 0); 1251 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1203 if (buf == NULL) return(0); 1252 if (buf == NULL) return(0);
1204 1253
1205 htmlDocContentDumpFormatOutput(buf, cur, encoding, format); 1254 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1206 1255
1207 ret = xmlOutputBufferClose(buf); 1256 ret = xmlOutputBufferClose(buf);
1208 return(ret); 1257 return(ret);
1209 } 1258 }
1210 1259
1211 /** 1260 /**
1212 * htmlSaveFileEnc: 1261 * htmlSaveFileEnc:
1213 * @filename: the filename 1262 * @filename: the filename
1214 * @cur: the document 1263 * @cur: the document
1215 * @encoding: the document encoding 1264 * @encoding: the document encoding
1216 * 1265 *
1217 * Dump an HTML document to a file using a given encoding 1266 * Dump an HTML document to a file using a given encoding
1218 * and formatting returns/spaces are added. 1267 * and formatting returns/spaces are added.
1219 * 1268 *
1220 * returns: the number of byte written or -1 in case of failure. 1269 * returns: the number of byte written or -1 in case of failure.
1221 */ 1270 */
1222 int 1271 int
1223 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { 1272 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1224 return(htmlSaveFileFormat(filename, cur, encoding, 1)); 1273 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1225 } 1274 }
1226 1275
1227 #endif /* LIBXML_OUTPUT_ENABLED */ 1276 #endif /* LIBXML_OUTPUT_ENABLED */
1228 1277
1229 #define bottom_HTMLtree 1278 #define bottom_HTMLtree
1230 #include "elfgcchack.h" 1279 #include "elfgcchack.h"
1231 #endif /* LIBXML_HTML_ENABLED */ 1280 #endif /* LIBXML_HTML_ENABLED */
OLDNEW
« no previous file with comments | « third_party/libxml/src/HTMLparser.c ('k') | third_party/libxml/src/INSTALL » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698