| Index: third_party/libxml/src/HTMLtree.c
|
| diff --git a/third_party/libxml/src/HTMLtree.c b/third_party/libxml/src/HTMLtree.c
|
| index b5085836b900af69b90eab1d68bd71ed3ba55559..5c57fc572bc3236cc2ab538779cb58b7107ac7e9 100644
|
| --- a/third_party/libxml/src/HTMLtree.c
|
| +++ b/third_party/libxml/src/HTMLtree.c
|
| @@ -30,16 +30,18 @@
|
| #include <libxml/globals.h>
|
| #include <libxml/uri.h>
|
|
|
| +#include "buf.h"
|
| +
|
| /************************************************************************
|
| * *
|
| - * Getting/Setting encoding meta tags *
|
| + * Getting/Setting encoding meta tags *
|
| * *
|
| ************************************************************************/
|
|
|
| /**
|
| * htmlGetMetaEncoding:
|
| * @doc: the document
|
| - *
|
| + *
|
| * Encoding definition lookup in the Meta tags
|
| *
|
| * Returns the current encoding as flagged in the HTML source
|
| @@ -126,17 +128,17 @@ found_meta:
|
|
|
| found_content:
|
| encoding = xmlStrstr(content, BAD_CAST"charset=");
|
| - if (encoding == NULL)
|
| + if (encoding == NULL)
|
| encoding = xmlStrstr(content, BAD_CAST"Charset=");
|
| - if (encoding == NULL)
|
| + if (encoding == NULL)
|
| encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
|
| if (encoding != NULL) {
|
| encoding += 8;
|
| } else {
|
| encoding = xmlStrstr(content, BAD_CAST"charset =");
|
| - if (encoding == NULL)
|
| + if (encoding == NULL)
|
| encoding = xmlStrstr(content, BAD_CAST"Charset =");
|
| - if (encoding == NULL)
|
| + if (encoding == NULL)
|
| encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
|
| if (encoding != NULL)
|
| encoding += 9;
|
| @@ -151,7 +153,7 @@ found_content:
|
| * htmlSetMetaEncoding:
|
| * @doc: the document
|
| * @encoding: the encoding string
|
| - *
|
| + *
|
| * Sets the current encoding in the Meta tags
|
| * NOTE: this will not change the document content encoding, just
|
| * the META flag associated.
|
| @@ -164,6 +166,7 @@ htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
|
| const xmlChar *content = NULL;
|
| char newcontent[100];
|
|
|
| + newcontent[0] = 0;
|
|
|
| if (doc == NULL)
|
| return(-1);
|
| @@ -244,7 +247,7 @@ found_meta:
|
| http = 1;
|
| else
|
| {
|
| - if ((value != NULL) &&
|
| + if ((value != NULL) &&
|
| (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
|
| content = value;
|
| }
|
| @@ -278,8 +281,13 @@ create:
|
| xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
|
| }
|
| } else {
|
| + /* remove the meta tag if NULL is passed */
|
| + if (encoding == NULL) {
|
| + xmlUnlinkNode(meta);
|
| + xmlFreeNode(meta);
|
| + }
|
| /* change the document only if there is a real encoding change */
|
| - if (xmlStrcasestr(content, encoding) == NULL) {
|
| + else if (xmlStrcasestr(content, encoding) == NULL) {
|
| xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
|
| }
|
| }
|
| @@ -308,7 +316,7 @@ static const char* htmlBooleanAttrs[] = {
|
| * @name: the name of the attribute to check
|
| *
|
| * Determine if a given attribute is a boolean attribute.
|
| - *
|
| + *
|
| * returns: false if the attribute is not boolean, true otherwise.
|
| */
|
| int
|
| @@ -332,7 +340,7 @@ xmlOutputBufferPtr
|
| xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
|
| /************************************************************************
|
| * *
|
| - * Output error handlers *
|
| + * Output error handlers *
|
| * *
|
| ************************************************************************/
|
| /**
|
| @@ -381,17 +389,13 @@ htmlSaveErr(int code, xmlNodePtr node, const char *extra)
|
|
|
| /************************************************************************
|
| * *
|
| - * Dumping HTML tree content to a simple buffer *
|
| + * Dumping HTML tree content to a simple buffer *
|
| * *
|
| ************************************************************************/
|
|
|
| -static int
|
| -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
| - int format);
|
| -
|
| /**
|
| - * htmlNodeDumpFormat:
|
| - * @buf: the HTML buffer output
|
| + * htmlBufNodeDumpFormat:
|
| + * @buf: the xmlBufPtr output
|
| * @doc: the document
|
| * @cur: the current node
|
| * @format: should formatting spaces been added
|
| @@ -400,10 +404,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
| *
|
| * Returns the number of byte written or -1 in case of error
|
| */
|
| -static int
|
| -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
| +static size_t
|
| +htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
| int format) {
|
| - unsigned int use;
|
| + size_t use;
|
| int ret;
|
| xmlOutputBufferPtr outbuf;
|
|
|
| @@ -426,10 +430,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
| outbuf->context = NULL;
|
| outbuf->written = 0;
|
|
|
| - use = buf->use;
|
| + use = xmlBufUse(buf);
|
| htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
|
| xmlFree(outbuf);
|
| - ret = buf->use - use;
|
| + ret = xmlBufUse(buf) - use;
|
| return (ret);
|
| }
|
|
|
| @@ -446,9 +450,24 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
| */
|
| int
|
| htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
|
| + xmlBufPtr buffer;
|
| + size_t ret;
|
| +
|
| + if ((buf == NULL) || (cur == NULL))
|
| + return(-1);
|
| +
|
| xmlInitParser();
|
| + buffer = xmlBufFromBuffer(buf);
|
| + if (buffer == NULL)
|
| + return(-1);
|
| +
|
| + ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
|
|
|
| - return(htmlNodeDumpFormat(buf, doc, cur, 1));
|
| + xmlBufBackToBuffer(buffer);
|
| +
|
| + if (ret > INT_MAX)
|
| + return(-1);
|
| + return((int) ret);
|
| }
|
|
|
| /**
|
| @@ -481,7 +500,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
|
| if (enc != XML_CHAR_ENCODING_UTF8) {
|
| handler = xmlFindCharEncodingHandler(encoding);
|
| if (handler == NULL)
|
| - return(-1);
|
| + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
| }
|
| }
|
|
|
| @@ -493,7 +512,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
|
| if (handler == NULL)
|
| handler = xmlFindCharEncodingHandler("ascii");
|
|
|
| - /*
|
| + /*
|
| * save the content to a temp buffer.
|
| */
|
| buf = xmlOutputBufferCreateFile(out, handler);
|
| @@ -562,11 +581,9 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
|
| }
|
|
|
| handler = xmlFindCharEncodingHandler(encoding);
|
| - if (handler == NULL) {
|
| - *mem = NULL;
|
| - *size = 0;
|
| - return;
|
| - }
|
| + if (handler == NULL)
|
| + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
| +
|
| } else {
|
| handler = xmlFindCharEncodingHandler(encoding);
|
| }
|
| @@ -587,15 +604,15 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
|
| return;
|
| }
|
|
|
| - htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
|
| + htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
|
|
|
| xmlOutputBufferFlush(buf);
|
| if (buf->conv != NULL) {
|
| - *size = buf->conv->use;
|
| - *mem = xmlStrndup(buf->conv->content, *size);
|
| + *size = xmlBufUse(buf->conv);
|
| + *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
|
| } else {
|
| - *size = buf->buffer->use;
|
| - *mem = xmlStrndup(buf->buffer->content, *size);
|
| + *size = xmlBufUse(buf->buffer);
|
| + *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
|
| }
|
| (void)xmlOutputBufferClose(buf);
|
| }
|
| @@ -617,7 +634,7 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
|
|
|
| /************************************************************************
|
| * *
|
| - * Dumping HTML tree content to an I/O output buffer *
|
| + * Dumping HTML tree content to an I/O output buffer *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -628,7 +645,7 @@ void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
|
| * @buf: the HTML buffer output
|
| * @doc: the document
|
| * @encoding: the encoding string
|
| - *
|
| + *
|
| * TODO: check whether encoding is needed
|
| *
|
| * Dump the HTML document DTD, if any.
|
| @@ -646,14 +663,14 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
| xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
| if (cur->ExternalID != NULL) {
|
| xmlOutputBufferWriteString(buf, " PUBLIC ");
|
| - xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
|
| + xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
|
| if (cur->SystemID != NULL) {
|
| xmlOutputBufferWriteString(buf, " ");
|
| - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
|
| - }
|
| + xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
|
| + }
|
| } else if (cur->SystemID != NULL) {
|
| xmlOutputBufferWriteString(buf, " SYSTEM ");
|
| - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
|
| + xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
|
| }
|
| xmlOutputBufferWriteString(buf, ">\n");
|
| }
|
| @@ -673,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
|
| xmlChar *value;
|
|
|
| /*
|
| - * TODO: The html output method should not escape a & character
|
| - * occurring in an attribute value immediately followed by
|
| - * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
|
| + * The html output method should not escape a & character
|
| + * occurring in an attribute value immediately followed by
|
| + * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
|
| + * This is implemented in xmlEncodeEntitiesReentrant
|
| */
|
|
|
| if (cur == NULL) {
|
| @@ -698,20 +716,51 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
|
| (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
|
| ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
|
| (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
|
| - xmlChar *escaped;
|
| xmlChar *tmp = value;
|
| + /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
|
| + xmlBufCCat(buf->buffer, "\"");
|
|
|
| while (IS_BLANK_CH(*tmp)) tmp++;
|
|
|
| - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
|
| - if (escaped != NULL) {
|
| - xmlBufferWriteQuotedString(buf->buffer, escaped);
|
| - xmlFree(escaped);
|
| - } else {
|
| - xmlBufferWriteQuotedString(buf->buffer, value);
|
| + /* URI Escape everything, except server side includes. */
|
| + for ( ; ; ) {
|
| + xmlChar *escaped;
|
| + xmlChar endChar;
|
| + xmlChar *end = NULL;
|
| + xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
|
| + if (start != NULL) {
|
| + end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
|
| + if (end != NULL) {
|
| + *start = '\0';
|
| + }
|
| + }
|
| +
|
| + /* Escape the whole string, or until start (set to '\0'). */
|
| + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
|
| + if (escaped != NULL) {
|
| + xmlBufCat(buf->buffer, escaped);
|
| + xmlFree(escaped);
|
| + } else {
|
| + xmlBufCat(buf->buffer, tmp);
|
| + }
|
| +
|
| + if (end == NULL) { /* Everything has been written. */
|
| + break;
|
| + }
|
| +
|
| + /* Do not escape anything within server side includes. */
|
| + *start = '<'; /* Restore the first character of "<!--". */
|
| + end += 3; /* strlen("-->") */
|
| + endChar = *end;
|
| + *end = '\0';
|
| + xmlBufCat(buf->buffer, start);
|
| + *end = endChar;
|
| + tmp = end;
|
| }
|
| +
|
| + xmlBufCCat(buf->buffer, "\"");
|
| } else {
|
| - xmlBufferWriteQuotedString(buf->buffer, value);
|
| + xmlBufWriteQuotedString(buf->buffer, value);
|
| }
|
| xmlFree(value);
|
| } else {
|
| @@ -1061,7 +1110,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) {
|
|
|
| handler = xmlFindCharEncodingHandler(encoding);
|
| if (handler == NULL)
|
| - return(-1);
|
| + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
| } else {
|
| handler = xmlFindCharEncodingHandler(encoding);
|
| }
|
| @@ -1101,7 +1150,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
|
|
|
| if ((cur == NULL) || (filename == NULL))
|
| return(-1);
|
| -
|
| +
|
| xmlInitParser();
|
|
|
| encoding = (const char *) htmlGetMetaEncoding(cur);
|
| @@ -1120,7 +1169,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
|
|
|
| handler = xmlFindCharEncodingHandler(encoding);
|
| if (handler == NULL)
|
| - return(-1);
|
| + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
| }
|
| }
|
|
|
| @@ -1132,7 +1181,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
|
| if (handler == NULL)
|
| handler = xmlFindCharEncodingHandler("ascii");
|
|
|
| - /*
|
| + /*
|
| * save the content to a temp buffer.
|
| */
|
| buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
|
| @@ -1152,7 +1201,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
|
| * @encoding: the document encoding
|
| *
|
| * Dump an HTML document to a file using a given encoding.
|
| - *
|
| + *
|
| * returns: the number of byte written or -1 in case of failure.
|
| */
|
| int
|
| @@ -1181,7 +1230,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
|
|
|
| handler = xmlFindCharEncodingHandler(encoding);
|
| if (handler == NULL)
|
| - return(-1);
|
| + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
| }
|
| htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
|
| } else {
|
| @@ -1196,7 +1245,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
|
| if (handler == NULL)
|
| handler = xmlFindCharEncodingHandler("ascii");
|
|
|
| - /*
|
| + /*
|
| * save the content to a temp buffer.
|
| */
|
| buf = xmlOutputBufferCreateFilename(filename, handler, 0);
|
| @@ -1216,7 +1265,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
|
| *
|
| * Dump an HTML document to a file using a given encoding
|
| * and formatting returns/spaces are added.
|
| - *
|
| + *
|
| * returns: the number of byte written or -1 in case of failure.
|
| */
|
| int
|
|
|