third_party/libxml/src/HTMLtree.c - Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28

Side by Side Diff: third_party/libxml/src/HTMLtree.c

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: no iconv Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * HTMLtree.c : implementation of access function for an HTML tree.	2 * HTMLtree.c : implementation of access function for an HTML tree.

3 *	3 *

4 * See Copyright for the status of this software.	4 * See Copyright for the status of this software.

5 *	5 *

6 * daniel@veillard.com	6 * daniel@veillard.com

7 */	7 */

8	8

9	9

10 #define IN_LIBXML	10 #define IN_LIBXML

(...skipping 12 matching lines...) Expand all Loading...
23 #include <libxml/xmlmemory.h>	23 #include <libxml/xmlmemory.h>

24 #include <libxml/HTMLparser.h>	24 #include <libxml/HTMLparser.h>

25 #include <libxml/HTMLtree.h>	25 #include <libxml/HTMLtree.h>

26 #include <libxml/entities.h>	26 #include <libxml/entities.h>

27 #include <libxml/valid.h>	27 #include <libxml/valid.h>

28 #include <libxml/xmlerror.h>	28 #include <libxml/xmlerror.h>

29 #include <libxml/parserInternals.h>	29 #include <libxml/parserInternals.h>

30 #include <libxml/globals.h>	30 #include <libxml/globals.h>

31 #include <libxml/uri.h>	31 #include <libxml/uri.h>

32	32

	33 #include "buf.h"

	34

33 /************************************************************************	35 /************************************************************************

34 * *	36 * *

35 * » » Getting/Setting encoding meta tags» » » *	37 » » Getting/Setting encoding meta tags» » »

36 * *	38 * *

37 ************************************************************************/	39 ************************************************************************/

38	40

39 /**	41 /**

40 * htmlGetMetaEncoding:	42 * htmlGetMetaEncoding:

41 * @doc: the document	43 * @doc: the document

42 *	44 *

43 * Encoding definition lookup in the Meta tags	45 * Encoding definition lookup in the Meta tags

44 *	46 *

45 * Returns the current encoding as flagged in the HTML source	47 * Returns the current encoding as flagged in the HTML source

46 */	48 */

47 const xmlChar *	49 const xmlChar *

48 htmlGetMetaEncoding(htmlDocPtr doc) {	50 htmlGetMetaEncoding(htmlDocPtr doc) {

49 htmlNodePtr cur;	51 htmlNodePtr cur;

50 const xmlChar *content;	52 const xmlChar *content;

51 const xmlChar *encoding;	53 const xmlChar *encoding;

52	54

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
119 attr = attr->next;	121 attr = attr->next;

120 }	122 }

121 }	123 }

122 }	124 }

123 cur = cur->next;	125 cur = cur->next;

124 }	126 }

125 return(NULL);	127 return(NULL);

126	128

127 found_content:	129 found_content:

128 encoding = xmlStrstr(content, BAD_CAST"charset=");	130 encoding = xmlStrstr(content, BAD_CAST"charset=");

129 if (encoding == NULL)	131 if (encoding == NULL)

130 encoding = xmlStrstr(content, BAD_CAST"Charset=");	132 encoding = xmlStrstr(content, BAD_CAST"Charset=");

131 if (encoding == NULL)	133 if (encoding == NULL)

132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");	134 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");

133 if (encoding != NULL) {	135 if (encoding != NULL) {

134 encoding += 8;	136 encoding += 8;

135 } else {	137 } else {

136 encoding = xmlStrstr(content, BAD_CAST"charset =");	138 encoding = xmlStrstr(content, BAD_CAST"charset =");

137 » if (encoding == NULL)	139 » if (encoding == NULL)

138 encoding = xmlStrstr(content, BAD_CAST"Charset =");	140 encoding = xmlStrstr(content, BAD_CAST"Charset =");

139 » if (encoding == NULL)	141 » if (encoding == NULL)

140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");	142 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");

141 if (encoding != NULL)	143 if (encoding != NULL)

142 encoding += 9;	144 encoding += 9;

143 }	145 }

144 if (encoding != NULL) {	146 if (encoding != NULL) {

145 while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;	147 while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;

146 }	148 }

147 return(encoding);	149 return(encoding);

148 }	150 }

149	151

150 /**	152 /**

151 * htmlSetMetaEncoding:	153 * htmlSetMetaEncoding:

152 * @doc: the document	154 * @doc: the document

153 * @encoding: the encoding string	155 * @encoding: the encoding string

154 *	156 *

155 * Sets the current encoding in the Meta tags	157 * Sets the current encoding in the Meta tags

156 * NOTE: this will not change the document content encoding, just	158 * NOTE: this will not change the document content encoding, just

157 * the META flag associated.	159 * the META flag associated.

158 *	160 *

159 * Returns 0 in case of success and -1 in case of error	161 * Returns 0 in case of success and -1 in case of error

160 */	162 */

161 int	163 int

162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {	164 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {

163 htmlNodePtr cur, meta = NULL, head = NULL;	165 htmlNodePtr cur, meta = NULL, head = NULL;

164 const xmlChar *content = NULL;	166 const xmlChar *content = NULL;

165 char newcontent[100];	167 char newcontent[100];

166	168

	169 newcontent[0] = 0;

167	170

168 if (doc == NULL)	171 if (doc == NULL)

169 return(-1);	172 return(-1);

170	173

171 /* html isn't a real encoding it's just libxml2 way to get entities */	174 /* html isn't a real encoding it's just libxml2 way to get entities */

172 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))	175 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))

173 return(-1);	176 return(-1);

174	177

175 if (encoding != NULL) {	178 if (encoding != NULL) {

176 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",	179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
237 while (attr != NULL) {	240 while (attr != NULL) {

238 if ((attr->children != NULL) &&	241 if ((attr->children != NULL) &&

239 (attr->children->type == XML_TEXT_NODE) &&	242 (attr->children->type == XML_TEXT_NODE) &&

240 (attr->children->next == NULL)) {	243 (attr->children->next == NULL)) {

241 value = attr->children->content;	244 value = attr->children->content;

242 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))	245 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))

243 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))	246 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))

244 http = 1;	247 http = 1;

245 else	248 else

246 {	249 {

247 if ((value != NULL) &&	250 if ((value != NULL) &&

248 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))	251 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))

249 content = value;	252 content = value;

250 }	253 }

251 if ((http != 0) && (content != NULL))	254 if ((http != 0) && (content != NULL))

252 break;	255 break;

253 }	256 }

254 attr = attr->next;	257 attr = attr->next;

255 }	258 }

256 if ((http != 0) && (content != NULL)) {	259 if ((http != 0) && (content != NULL)) {

257 meta = cur;	260 meta = cur;

(...skipping 13 matching lines...) Expand all Loading...
271	274

272 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);	275 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);

273 if (head->children == NULL)	276 if (head->children == NULL)

274 xmlAddChild(head, meta);	277 xmlAddChild(head, meta);

275 else	278 else

276 xmlAddPrevSibling(head->children, meta);	279 xmlAddPrevSibling(head->children, meta);

277 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");	280 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");

278 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);	281 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);

279 }	282 }

280 } else {	283 } else {

	284 /* remove the meta tag if NULL is passed */

	285 if (encoding == NULL) {

	286 xmlUnlinkNode(meta);

	287 xmlFreeNode(meta);

	288 }

281 /* change the document only if there is a real encoding change */	289 /* change the document only if there is a real encoding change */

282 if (xmlStrcasestr(content, encoding) == NULL) {	290 else if (xmlStrcasestr(content, encoding) == NULL) {

283 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);	291 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);

284 }	292 }

285 }	293 }

286	294

287	295

288 return(0);	296 return(0);

289 }	297 }

290	298

291 /**	299 /**

292 * booleanHTMLAttrs:	300 * booleanHTMLAttrs:

293 *	301 *

294 * These are the HTML attributes which will be output	302 * These are the HTML attributes which will be output

295 * in minimized form, i.e. <option selected="selected"> will be	303 * in minimized form, i.e. <option selected="selected"> will be

296 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"	304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"

297 *	305 *

298 */	306 */

299 static const char* htmlBooleanAttrs[] = {	307 static const char* htmlBooleanAttrs[] = {

300 "checked", "compact", "declare", "defer", "disabled", "ismap",	308 "checked", "compact", "declare", "defer", "disabled", "ismap",

301 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",	309 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",

302 "selected", NULL	310 "selected", NULL

303 };	311 };

304	312

305	313

306 /**	314 /**

307 * htmlIsBooleanAttr:	315 * htmlIsBooleanAttr:

308 * @name: the name of the attribute to check	316 * @name: the name of the attribute to check

309 *	317 *

310 * Determine if a given attribute is a boolean attribute.	318 * Determine if a given attribute is a boolean attribute.

311 *	319 *

312 * returns: false if the attribute is not boolean, true otherwise.	320 * returns: false if the attribute is not boolean, true otherwise.

313 */	321 */

314 int	322 int

315 htmlIsBooleanAttr(const xmlChar *name)	323 htmlIsBooleanAttr(const xmlChar *name)

316 {	324 {

317 int i = 0;	325 int i = 0;

318	326

319 while (htmlBooleanAttrs[i] != NULL) {	327 while (htmlBooleanAttrs[i] != NULL) {

320 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)	328 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)

321 return 1;	329 return 1;

322 i++;	330 i++;

323 }	331 }

324 return 0;	332 return 0;

325 }	333 }

326	334

327 #ifdef LIBXML_OUTPUT_ENABLED	335 #ifdef LIBXML_OUTPUT_ENABLED

328 /*	336 /*

329 * private routine exported from xmlIO.c	337 * private routine exported from xmlIO.c

330 */	338 */

331 xmlOutputBufferPtr	339 xmlOutputBufferPtr

332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);	340 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);

333 /************************************************************************	341 /************************************************************************

334 * *	342 * *

335 * » » » Output error handlers» » » » *	343 » » » Output error handlers» » » »

336 * *	344 * *

337 ************************************************************************/	345 ************************************************************************/

338 /**	346 /**

339 * htmlSaveErrMemory:	347 * htmlSaveErrMemory:

340 * @extra: extra informations	348 * @extra: extra informations

341 *	349 *

342 * Handle an out of memory condition	350 * Handle an out of memory condition

343 */	351 */

344 static void	352 static void

345 htmlSaveErrMemory(const char *extra)	353 htmlSaveErrMemory(const char *extra)

(...skipping 28 matching lines...) Expand all Loading...
374 msg = "HTML has no DOCTYPE\n";	382 msg = "HTML has no DOCTYPE\n";

375 break;	383 break;

376 default:	384 default:

377 msg = "unexpected error number\n";	385 msg = "unexpected error number\n";

378 }	386 }

379 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);	387 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);

380 }	388 }

381	389

382 /************************************************************************	390 /************************************************************************

383 * *	391 * *

384 * » » Dumping HTML tree content to a simple buffer» » *	392 » » Dumping HTML tree content to a simple buffer» »

385 * *	393 * *

386 ************************************************************************/	394 ************************************************************************/

387	395

388 static int

389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,

390 int format);

391

392 /**	396 /**

393 * htmlNodeDumpFormat:	397 * htmlBufNodeDumpFormat:

394 * @buf: the HTML buffer output	398 * @buf: the xmlBufPtr output

395 * @doc: the document	399 * @doc: the document

396 * @cur: the current node	400 * @cur: the current node

397 * @format: should formatting spaces been added	401 * @format: should formatting spaces been added

398 *	402 *

399 * Dump an HTML node, recursive behaviour,children are printed too.	403 * Dump an HTML node, recursive behaviour,children are printed too.

400 *	404 *

401 * Returns the number of byte written or -1 in case of error	405 * Returns the number of byte written or -1 in case of error

402 */	406 */

403 static int	407 static size_t

404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,	408 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,

405 int format) {	409 int format) {

406 unsigned int use;	410 size_t use;

407 int ret;	411 int ret;

408 xmlOutputBufferPtr outbuf;	412 xmlOutputBufferPtr outbuf;

409	413

410 if (cur == NULL) {	414 if (cur == NULL) {

411 return (-1);	415 return (-1);

412 }	416 }

413 if (buf == NULL) {	417 if (buf == NULL) {

414 return (-1);	418 return (-1);

415 }	419 }

416 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));	420 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));

417 if (outbuf == NULL) {	421 if (outbuf == NULL) {

418 htmlSaveErrMemory("allocating HTML output buffer");	422 htmlSaveErrMemory("allocating HTML output buffer");

419 return (-1);	423 return (-1);

420 }	424 }

421 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));	425 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));

422 outbuf->buffer = buf;	426 outbuf->buffer = buf;

423 outbuf->encoder = NULL;	427 outbuf->encoder = NULL;

424 outbuf->writecallback = NULL;	428 outbuf->writecallback = NULL;

425 outbuf->closecallback = NULL;	429 outbuf->closecallback = NULL;

426 outbuf->context = NULL;	430 outbuf->context = NULL;

427 outbuf->written = 0;	431 outbuf->written = 0;

428	432

429 use = buf->use;	433 use = xmlBufUse(buf);

430 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);	434 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);

431 xmlFree(outbuf);	435 xmlFree(outbuf);

432 ret = buf->use - use;	436 ret = xmlBufUse(buf) - use;

433 return (ret);	437 return (ret);

434 }	438 }

435	439

436 /**	440 /**

437 * htmlNodeDump:	441 * htmlNodeDump:

438 * @buf: the HTML buffer output	442 * @buf: the HTML buffer output

439 * @doc: the document	443 * @doc: the document

440 * @cur: the current node	444 * @cur: the current node

441 *	445 *

442 * Dump an HTML node, recursive behaviour,children are printed too,	446 * Dump an HTML node, recursive behaviour,children are printed too,

443 * and formatting returns are added.	447 * and formatting returns are added.

444 *	448 *

445 * Returns the number of byte written or -1 in case of error	449 * Returns the number of byte written or -1 in case of error

446 */	450 */

447 int	451 int

448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {	452 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {

	453 xmlBufPtr buffer;

	454 size_t ret;

	455

	456 if ((buf == NULL) \|\| (cur == NULL))

	457 return(-1);

	458

449 xmlInitParser();	459 xmlInitParser();

	460 buffer = xmlBufFromBuffer(buf);

	461 if (buffer == NULL)

	462 return(-1);

450	463

451 return(htmlNodeDumpFormat(buf, doc, cur, 1));	464 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);

	465

	466 xmlBufBackToBuffer(buffer);

	467

	468 if (ret > INT_MAX)

	469 return(-1);

	470 return((int) ret);

452 }	471 }

453	472

454 /**	473 /**

455 * htmlNodeDumpFileFormat:	474 * htmlNodeDumpFileFormat:

456 * @out: the FILE pointer	475 * @out: the FILE pointer

457 * @doc: the document	476 * @doc: the document

458 * @cur: the current node	477 * @cur: the current node

459 * @encoding: the document encoding	478 * @encoding: the document encoding

460 * @format: should formatting spaces been added	479 * @format: should formatting spaces been added

461 *	480 *

(...skipping 12 matching lines...) Expand all Loading...
474	493

475 xmlInitParser();	494 xmlInitParser();

476	495

477 if (encoding != NULL) {	496 if (encoding != NULL) {

478 xmlCharEncoding enc;	497 xmlCharEncoding enc;

479	498

480 enc = xmlParseCharEncoding(encoding);	499 enc = xmlParseCharEncoding(encoding);

481 if (enc != XML_CHAR_ENCODING_UTF8) {	500 if (enc != XML_CHAR_ENCODING_UTF8) {

482 handler = xmlFindCharEncodingHandler(encoding);	501 handler = xmlFindCharEncodingHandler(encoding);

483 if (handler == NULL)	502 if (handler == NULL)

484 » » return(-1);	503 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);

485 }	504 }

486 }	505 }

487	506

488 /*	507 /*

489 * Fallback to HTML or ASCII when the encoding is unspecified	508 * Fallback to HTML or ASCII when the encoding is unspecified

490 */	509 */

491 if (handler == NULL)	510 if (handler == NULL)

492 handler = xmlFindCharEncodingHandler("HTML");	511 handler = xmlFindCharEncodingHandler("HTML");

493 if (handler == NULL)	512 if (handler == NULL)

494 handler = xmlFindCharEncodingHandler("ascii");	513 handler = xmlFindCharEncodingHandler("ascii");

495	514

496 /*	515 /*

497 * save the content to a temp buffer.	516 * save the content to a temp buffer.

498 */	517 */

499 buf = xmlOutputBufferCreateFile(out, handler);	518 buf = xmlOutputBufferCreateFile(out, handler);

500 if (buf == NULL) return(0);	519 if (buf == NULL) return(0);

501	520

502 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);	521 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);

503	522

504 ret = xmlOutputBufferClose(buf);	523 ret = xmlOutputBufferClose(buf);

505 return(ret);	524 return(ret);

506 }	525 }

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
555 if (cur->charset != XML_CHAR_ENCODING_UTF8) {	574 if (cur->charset != XML_CHAR_ENCODING_UTF8) {

556 /*	575 /*

557 * Not supported yet	576 * Not supported yet

558 */	577 */

559 *mem = NULL;	578 *mem = NULL;

560 *size = 0;	579 *size = 0;

561 return;	580 return;

562 }	581 }

563	582

564 handler = xmlFindCharEncodingHandler(encoding);	583 handler = xmlFindCharEncodingHandler(encoding);

565 » if (handler == NULL) {	584 » if (handler == NULL)

566 » » *mem = NULL;	585 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);

567 » » *size = 0;	586

568 » » return;

569 » }

570 } else {	587 } else {

571 handler = xmlFindCharEncodingHandler(encoding);	588 handler = xmlFindCharEncodingHandler(encoding);

572 }	589 }

573 }	590 }

574	591

575 /*	592 /*

576 * Fallback to HTML or ASCII when the encoding is unspecified	593 * Fallback to HTML or ASCII when the encoding is unspecified

577 */	594 */

578 if (handler == NULL)	595 if (handler == NULL)

579 handler = xmlFindCharEncodingHandler("HTML");	596 handler = xmlFindCharEncodingHandler("HTML");

580 if (handler == NULL)	597 if (handler == NULL)

581 handler = xmlFindCharEncodingHandler("ascii");	598 handler = xmlFindCharEncodingHandler("ascii");

582	599

583 buf = xmlAllocOutputBufferInternal(handler);	600 buf = xmlAllocOutputBufferInternal(handler);

584 if (buf == NULL) {	601 if (buf == NULL) {

585 *mem = NULL;	602 *mem = NULL;

586 *size = 0;	603 *size = 0;

587 return;	604 return;

588 }	605 }

589	606

590 » htmlDocContentDumpFormatOutput(buf, cur, NULL, format);	607 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);

591	608

592 xmlOutputBufferFlush(buf);	609 xmlOutputBufferFlush(buf);

593 if (buf->conv != NULL) {	610 if (buf->conv != NULL) {

594 » *size = buf->conv->use;	611 » *size = xmlBufUse(buf->conv);

595 » mem = xmlStrndup(buf->conv->content, size);	612 » mem = xmlStrndup(xmlBufContent(buf->conv), size);

596 } else {	613 } else {

597 » *size = buf->buffer->use;	614 » *size = xmlBufUse(buf->buffer);

598 » mem = xmlStrndup(buf->buffer->content, size);	615 » mem = xmlStrndup(xmlBufContent(buf->buffer), size);

599 }	616 }

600 (void)xmlOutputBufferClose(buf);	617 (void)xmlOutputBufferClose(buf);

601 }	618 }

602	619

603 /**	620 /**

604 * htmlDocDumpMemory:	621 * htmlDocDumpMemory:

605 * @cur: the document	622 * @cur: the document

606 * @mem: OUT: the memory pointer	623 * @mem: OUT: the memory pointer

607 * @size: OUT: the memory length	624 * @size: OUT: the memory length

608 *	625 *

609 * Dump an HTML document in memory and return the xmlChar * and it's size.	626 * Dump an HTML document in memory and return the xmlChar * and it's size.

610 * It's up to the caller to free the memory.	627 * It's up to the caller to free the memory.

611 */	628 */

612 void	629 void

613 htmlDocDumpMemory(xmlDocPtr cur, xmlChar*mem, int size) {	630 htmlDocDumpMemory(xmlDocPtr cur, xmlChar*mem, int size) {

614 htmlDocDumpMemoryFormat(cur, mem, size, 1);	631 htmlDocDumpMemoryFormat(cur, mem, size, 1);

615 }	632 }

616	633

617	634

618 /************************************************************************	635 /************************************************************************

619 * *	636 * *

620 * » » Dumping HTML tree content to an I/O output buffer» *	637 » » Dumping HTML tree content to an I/O output buffer»

621 * *	638 * *

622 ************************************************************************/	639 ************************************************************************/

623	640

624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);	641 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);

625	642

626 /**	643 /**

627 * htmlDtdDumpOutput:	644 * htmlDtdDumpOutput:

628 * @buf: the HTML buffer output	645 * @buf: the HTML buffer output

629 * @doc: the document	646 * @doc: the document

630 * @encoding: the encoding string	647 * @encoding: the encoding string

631 *	648 *

632 * TODO: check whether encoding is needed	649 * TODO: check whether encoding is needed

633 *	650 *

634 * Dump the HTML document DTD, if any.	651 * Dump the HTML document DTD, if any.

635 */	652 */

636 static void	653 static void

637 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,	654 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,

638 const char *encoding ATTRIBUTE_UNUSED) {	655 const char *encoding ATTRIBUTE_UNUSED) {

639 xmlDtdPtr cur = doc->intSubset;	656 xmlDtdPtr cur = doc->intSubset;

640	657

641 if (cur == NULL) {	658 if (cur == NULL) {

642 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);	659 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);

643 return;	660 return;

644 }	661 }

645 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");	662 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");

646 xmlOutputBufferWriteString(buf, (const char *)cur->name);	663 xmlOutputBufferWriteString(buf, (const char *)cur->name);

647 if (cur->ExternalID != NULL) {	664 if (cur->ExternalID != NULL) {

648 xmlOutputBufferWriteString(buf, " PUBLIC ");	665 xmlOutputBufferWriteString(buf, " PUBLIC ");

649 » xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);	666 » xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);

650 if (cur->SystemID != NULL) {	667 if (cur->SystemID != NULL) {

651 xmlOutputBufferWriteString(buf, " ");	668 xmlOutputBufferWriteString(buf, " ");

652 » xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);	669 » xmlBufWriteQuotedString(buf->buffer, cur->SystemID);

653 » }	670 » }

654 } else if (cur->SystemID != NULL) {	671 } else if (cur->SystemID != NULL) {

655 xmlOutputBufferWriteString(buf, " SYSTEM ");	672 xmlOutputBufferWriteString(buf, " SYSTEM ");

656 » xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);	673 » xmlBufWriteQuotedString(buf->buffer, cur->SystemID);

657 }	674 }

658 xmlOutputBufferWriteString(buf, ">\n");	675 xmlOutputBufferWriteString(buf, ">\n");

659 }	676 }

660	677

661 /**	678 /**

662 * htmlAttrDumpOutput:	679 * htmlAttrDumpOutput:

663 * @buf: the HTML buffer output	680 * @buf: the HTML buffer output

664 * @doc: the document	681 * @doc: the document

665 * @cur: the attribute pointer	682 * @cur: the attribute pointer

666 * @encoding: the encoding string	683 * @encoding: the encoding string

667 *	684 *

668 * Dump an HTML attribute	685 * Dump an HTML attribute

669 */	686 */

670 static void	687 static void

671 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,	688 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,

672 const char *encoding ATTRIBUTE_UNUSED) {	689 const char *encoding ATTRIBUTE_UNUSED) {

673 xmlChar *value;	690 xmlChar *value;

674	691

675 /*	692 /*

676 * TODO: The html output method should not escape a & character	693 * The html output method should not escape a & character

677 * occurring in an attribute value immediately followed by	694 * occurring in an attribute value immediately followed by

678 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).	695 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).

	696 * This is implemented in xmlEncodeEntitiesReentrant

679 */	697 */

680	698

681 if (cur == NULL) {	699 if (cur == NULL) {

682 return;	700 return;

683 }	701 }

684 xmlOutputBufferWriteString(buf, " ");	702 xmlOutputBufferWriteString(buf, " ");

685 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {	703 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {

686 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);	704 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);

687 xmlOutputBufferWriteString(buf, ":");	705 xmlOutputBufferWriteString(buf, ":");

688 }	706 }

689 xmlOutputBufferWriteString(buf, (const char *)cur->name);	707 xmlOutputBufferWriteString(buf, (const char *)cur->name);

690 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {	708 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {

691 value = xmlNodeListGetString(doc, cur->children, 0);	709 value = xmlNodeListGetString(doc, cur->children, 0);

692 if (value) {	710 if (value) {

693 xmlOutputBufferWriteString(buf, "=");	711 xmlOutputBufferWriteString(buf, "=");

694 if ((cur->ns == NULL) && (cur->parent != NULL) &&	712 if ((cur->ns == NULL) && (cur->parent != NULL) &&

695 (cur->parent->ns == NULL) &&	713 (cur->parent->ns == NULL) &&

696 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) \|\|	714 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) \|\|

697 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) \|\|	715 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) \|\|

698 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) \|\|	716 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) \|\|

699 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&	717 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&

700 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {	718 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {

701 xmlChar *escaped;

702 xmlChar *tmp = value;	719 xmlChar *tmp = value;

	720 /* xmlURIEscapeStr() escapes '"' so it can be safely used. */

	721 xmlBufCCat(buf->buffer, "\"");

703	722

704 while (IS_BLANK_CH(*tmp)) tmp++;	723 while (IS_BLANK_CH(*tmp)) tmp++;

705	724

706 » » escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");	725 » » /* URI Escape everything, except server side includes. */

707 » » if (escaped != NULL) {	726 » » for ( ; ; ) {

708 » » xmlBufferWriteQuotedString(buf->buffer, escaped);	727 » » xmlChar *escaped;

709 » » xmlFree(escaped);	728 » » xmlChar endChar;

710 » » } else {	729 » » xmlChar *end = NULL;

711 » » xmlBufferWriteQuotedString(buf->buffer, value);	730 » » xmlChar start = (xmlChar )xmlStrstr(tmp, BAD_CAST "<!--");

	731 » » if (start != NULL) {

	732 » » » end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");

	733 » » » if (end != NULL) {

	734 » » » *start = '\0';

	735 » » » }

	736 » » }

	737

	738 » » /* Escape the whole string, or until start (set to '\0'). */

	739 » » escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");

	740 » » if (escaped != NULL) {

	741 » » xmlBufCat(buf->buffer, escaped);

	742 » » xmlFree(escaped);

	743 » » } else {

	744 » » xmlBufCat(buf->buffer, tmp);

	745 » » }

	746

	747 » » if (end == NULL) { /* Everything has been written. */

	748 » » » break;

	749 » » }

	750

	751 » » /* Do not escape anything within server side includes. */

	752 » » start = '<'; / Restore the first character of "<!--". */

	753 » » end += 3; /* strlen("-->") */

	754 » » endChar = *end;

	755 » » *end = '\0';

	756 » » xmlBufCat(buf->buffer, start);

	757 » » *end = endChar;

	758 » » tmp = end;

712 }	759 }

	760

	761 xmlBufCCat(buf->buffer, "\"");

713 } else {	762 } else {

714 » » xmlBufferWriteQuotedString(buf->buffer, value);	763 » » xmlBufWriteQuotedString(buf->buffer, value);

715 }	764 }

716 xmlFree(value);	765 xmlFree(value);

717 } else {	766 } else {

718 xmlOutputBufferWriteString(buf, "=\"\"");	767 xmlOutputBufferWriteString(buf, "=\"\"");

719 }	768 }

720 }	769 }

721 }	770 }

722	771

723 /**	772 /**

724 * htmlAttrListDumpOutput:	773 * htmlAttrListDumpOutput:

(...skipping 329 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1054 if (enc != cur->charset) {	1103 if (enc != cur->charset) {

1055 if (cur->charset != XML_CHAR_ENCODING_UTF8) {	1104 if (cur->charset != XML_CHAR_ENCODING_UTF8) {

1056 /*	1105 /*

1057 * Not supported yet	1106 * Not supported yet

1058 */	1107 */

1059 return(-1);	1108 return(-1);

1060 }	1109 }

1061	1110

1062 handler = xmlFindCharEncodingHandler(encoding);	1111 handler = xmlFindCharEncodingHandler(encoding);

1063 if (handler == NULL)	1112 if (handler == NULL)

1064 » » return(-1);	1113 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);

1065 } else {	1114 } else {

1066 handler = xmlFindCharEncodingHandler(encoding);	1115 handler = xmlFindCharEncodingHandler(encoding);

1067 }	1116 }

1068 }	1117 }

1069	1118

1070 /*	1119 /*

1071 * Fallback to HTML or ASCII when the encoding is unspecified	1120 * Fallback to HTML or ASCII when the encoding is unspecified

1072 */	1121 */

1073 if (handler == NULL)	1122 if (handler == NULL)

1074 handler = xmlFindCharEncodingHandler("HTML");	1123 handler = xmlFindCharEncodingHandler("HTML");

(...skipping 19 matching lines...) Expand all Loading...
1094 */	1143 */

1095 int	1144 int

1096 htmlSaveFile(const char *filename, xmlDocPtr cur) {	1145 htmlSaveFile(const char *filename, xmlDocPtr cur) {

1097 xmlOutputBufferPtr buf;	1146 xmlOutputBufferPtr buf;

1098 xmlCharEncodingHandlerPtr handler = NULL;	1147 xmlCharEncodingHandlerPtr handler = NULL;

1099 const char *encoding;	1148 const char *encoding;

1100 int ret;	1149 int ret;

1101	1150

1102 if ((cur == NULL) \|\| (filename == NULL))	1151 if ((cur == NULL) \|\| (filename == NULL))

1103 return(-1);	1152 return(-1);

1104	1153

1105 xmlInitParser();	1154 xmlInitParser();

1106	1155

1107 encoding = (const char *) htmlGetMetaEncoding(cur);	1156 encoding = (const char *) htmlGetMetaEncoding(cur);

1108	1157

1109 if (encoding != NULL) {	1158 if (encoding != NULL) {

1110 xmlCharEncoding enc;	1159 xmlCharEncoding enc;

1111	1160

1112 enc = xmlParseCharEncoding(encoding);	1161 enc = xmlParseCharEncoding(encoding);

1113 if (enc != cur->charset) {	1162 if (enc != cur->charset) {

1114 if (cur->charset != XML_CHAR_ENCODING_UTF8) {	1163 if (cur->charset != XML_CHAR_ENCODING_UTF8) {

1115 /*	1164 /*

1116 * Not supported yet	1165 * Not supported yet

1117 */	1166 */

1118 return(-1);	1167 return(-1);

1119 }	1168 }

1120	1169

1121 handler = xmlFindCharEncodingHandler(encoding);	1170 handler = xmlFindCharEncodingHandler(encoding);

1122 if (handler == NULL)	1171 if (handler == NULL)

1123 » » return(-1);	1172 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);

1124 }	1173 }

1125 }	1174 }

1126	1175

1127 /*	1176 /*

1128 * Fallback to HTML or ASCII when the encoding is unspecified	1177 * Fallback to HTML or ASCII when the encoding is unspecified

1129 */	1178 */

1130 if (handler == NULL)	1179 if (handler == NULL)

1131 handler = xmlFindCharEncodingHandler("HTML");	1180 handler = xmlFindCharEncodingHandler("HTML");

1132 if (handler == NULL)	1181 if (handler == NULL)

1133 handler = xmlFindCharEncodingHandler("ascii");	1182 handler = xmlFindCharEncodingHandler("ascii");

1134	1183

1135 /*	1184 /*

1136 * save the content to a temp buffer.	1185 * save the content to a temp buffer.

1137 */	1186 */

1138 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);	1187 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);

1139 if (buf == NULL) return(0);	1188 if (buf == NULL) return(0);

1140	1189

1141 htmlDocContentDumpOutput(buf, cur, NULL);	1190 htmlDocContentDumpOutput(buf, cur, NULL);

1142	1191

1143 ret = xmlOutputBufferClose(buf);	1192 ret = xmlOutputBufferClose(buf);

1144 return(ret);	1193 return(ret);

1145 }	1194 }

1146	1195

1147 /**	1196 /**

1148 * htmlSaveFileFormat:	1197 * htmlSaveFileFormat:

1149 * @filename: the filename	1198 * @filename: the filename

1150 * @cur: the document	1199 * @cur: the document

1151 * @format: should formatting spaces been added	1200 * @format: should formatting spaces been added

1152 * @encoding: the document encoding	1201 * @encoding: the document encoding

1153 *	1202 *

1154 * Dump an HTML document to a file using a given encoding.	1203 * Dump an HTML document to a file using a given encoding.

1155 *	1204 *

1156 * returns: the number of byte written or -1 in case of failure.	1205 * returns: the number of byte written or -1 in case of failure.

1157 */	1206 */

1158 int	1207 int

1159 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,	1208 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,

1160 const char *encoding, int format) {	1209 const char *encoding, int format) {

1161 xmlOutputBufferPtr buf;	1210 xmlOutputBufferPtr buf;

1162 xmlCharEncodingHandlerPtr handler = NULL;	1211 xmlCharEncodingHandlerPtr handler = NULL;

1163 int ret;	1212 int ret;

1164	1213

1165 if ((cur == NULL) \|\| (filename == NULL))	1214 if ((cur == NULL) \|\| (filename == NULL))

1166 return(-1);	1215 return(-1);

1167	1216

1168 xmlInitParser();	1217 xmlInitParser();

1169	1218

1170 if (encoding != NULL) {	1219 if (encoding != NULL) {

1171 xmlCharEncoding enc;	1220 xmlCharEncoding enc;

1172	1221

1173 enc = xmlParseCharEncoding(encoding);	1222 enc = xmlParseCharEncoding(encoding);

1174 if (enc != cur->charset) {	1223 if (enc != cur->charset) {

1175 if (cur->charset != XML_CHAR_ENCODING_UTF8) {	1224 if (cur->charset != XML_CHAR_ENCODING_UTF8) {

1176 /*	1225 /*

1177 * Not supported yet	1226 * Not supported yet

1178 */	1227 */

1179 return(-1);	1228 return(-1);

1180 }	1229 }

1181	1230

1182 handler = xmlFindCharEncodingHandler(encoding);	1231 handler = xmlFindCharEncodingHandler(encoding);

1183 if (handler == NULL)	1232 if (handler == NULL)

1184 » » return(-1);	1233 » » htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);

1185 }	1234 }

1186 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);	1235 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);

1187 } else {	1236 } else {

1188 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");	1237 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");

1189 }	1238 }

1190	1239

1191 /*	1240 /*

1192 * Fallback to HTML or ASCII when the encoding is unspecified	1241 * Fallback to HTML or ASCII when the encoding is unspecified

1193 */	1242 */

1194 if (handler == NULL)	1243 if (handler == NULL)

1195 handler = xmlFindCharEncodingHandler("HTML");	1244 handler = xmlFindCharEncodingHandler("HTML");

1196 if (handler == NULL)	1245 if (handler == NULL)

1197 handler = xmlFindCharEncodingHandler("ascii");	1246 handler = xmlFindCharEncodingHandler("ascii");

1198	1247

1199 /*	1248 /*

1200 * save the content to a temp buffer.	1249 * save the content to a temp buffer.

1201 */	1250 */

1202 buf = xmlOutputBufferCreateFilename(filename, handler, 0);	1251 buf = xmlOutputBufferCreateFilename(filename, handler, 0);

1203 if (buf == NULL) return(0);	1252 if (buf == NULL) return(0);

1204	1253

1205 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);	1254 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);

1206	1255

1207 ret = xmlOutputBufferClose(buf);	1256 ret = xmlOutputBufferClose(buf);

1208 return(ret);	1257 return(ret);

1209 }	1258 }

1210	1259

1211 /**	1260 /**

1212 * htmlSaveFileEnc:	1261 * htmlSaveFileEnc:

1213 * @filename: the filename	1262 * @filename: the filename

1214 * @cur: the document	1263 * @cur: the document

1215 * @encoding: the document encoding	1264 * @encoding: the document encoding

1216 *	1265 *

1217 * Dump an HTML document to a file using a given encoding	1266 * Dump an HTML document to a file using a given encoding

1218 * and formatting returns/spaces are added.	1267 * and formatting returns/spaces are added.

1219 *	1268 *

1220 * returns: the number of byte written or -1 in case of failure.	1269 * returns: the number of byte written or -1 in case of failure.

1221 */	1270 */

1222 int	1271 int

1223 htmlSaveFileEnc(const char filename, xmlDocPtr cur, const char encoding) {	1272 htmlSaveFileEnc(const char filename, xmlDocPtr cur, const char encoding) {

1224 return(htmlSaveFileFormat(filename, cur, encoding, 1));	1273 return(htmlSaveFileFormat(filename, cur, encoding, 1));

1225 }	1274 }

1226	1275

1227 #endif /* LIBXML_OUTPUT_ENABLED */	1276 #endif /* LIBXML_OUTPUT_ENABLED */

1228	1277

1229 #define bottom_HTMLtree	1278 #define bottom_HTMLtree

1230 #include "elfgcchack.h"	1279 #include "elfgcchack.h"

1231 #endif /* LIBXML_HTML_ENABLED */	1280 #endif /* LIBXML_HTML_ENABLED */

OLD	NEW

« no previous file with comments | « third_party/libxml/src/HTMLparser.c ('k') | third_party/libxml/src/INSTALL » ('j') | no next file with comments »