Index: third_party/libxml/src/HTMLtree.c |
diff --git a/third_party/libxml/src/HTMLtree.c b/third_party/libxml/src/HTMLtree.c |
index b5085836b900af69b90eab1d68bd71ed3ba55559..5c57fc572bc3236cc2ab538779cb58b7107ac7e9 100644 |
--- a/third_party/libxml/src/HTMLtree.c |
+++ b/third_party/libxml/src/HTMLtree.c |
@@ -30,16 +30,18 @@ |
#include <libxml/globals.h> |
#include <libxml/uri.h> |
+#include "buf.h" |
+ |
/************************************************************************ |
* * |
- * Getting/Setting encoding meta tags * |
+ * Getting/Setting encoding meta tags * |
* * |
************************************************************************/ |
/** |
* htmlGetMetaEncoding: |
* @doc: the document |
- * |
+ * |
* Encoding definition lookup in the Meta tags |
* |
* Returns the current encoding as flagged in the HTML source |
@@ -126,17 +128,17 @@ found_meta: |
found_content: |
encoding = xmlStrstr(content, BAD_CAST"charset="); |
- if (encoding == NULL) |
+ if (encoding == NULL) |
encoding = xmlStrstr(content, BAD_CAST"Charset="); |
- if (encoding == NULL) |
+ if (encoding == NULL) |
encoding = xmlStrstr(content, BAD_CAST"CHARSET="); |
if (encoding != NULL) { |
encoding += 8; |
} else { |
encoding = xmlStrstr(content, BAD_CAST"charset ="); |
- if (encoding == NULL) |
+ if (encoding == NULL) |
encoding = xmlStrstr(content, BAD_CAST"Charset ="); |
- if (encoding == NULL) |
+ if (encoding == NULL) |
encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); |
if (encoding != NULL) |
encoding += 9; |
@@ -151,7 +153,7 @@ found_content: |
* htmlSetMetaEncoding: |
* @doc: the document |
* @encoding: the encoding string |
- * |
+ * |
* Sets the current encoding in the Meta tags |
* NOTE: this will not change the document content encoding, just |
* the META flag associated. |
@@ -164,6 +166,7 @@ htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { |
const xmlChar *content = NULL; |
char newcontent[100]; |
+ newcontent[0] = 0; |
if (doc == NULL) |
return(-1); |
@@ -244,7 +247,7 @@ found_meta: |
http = 1; |
else |
{ |
- if ((value != NULL) && |
+ if ((value != NULL) && |
(!xmlStrcasecmp(attr->name, BAD_CAST"content"))) |
content = value; |
} |
@@ -278,8 +281,13 @@ create: |
xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); |
} |
} else { |
+ /* remove the meta tag if NULL is passed */ |
+ if (encoding == NULL) { |
+ xmlUnlinkNode(meta); |
+ xmlFreeNode(meta); |
+ } |
/* change the document only if there is a real encoding change */ |
- if (xmlStrcasestr(content, encoding) == NULL) { |
+ else if (xmlStrcasestr(content, encoding) == NULL) { |
xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent); |
} |
} |
@@ -308,7 +316,7 @@ static const char* htmlBooleanAttrs[] = { |
* @name: the name of the attribute to check |
* |
* Determine if a given attribute is a boolean attribute. |
- * |
+ * |
* returns: false if the attribute is not boolean, true otherwise. |
*/ |
int |
@@ -332,7 +340,7 @@ xmlOutputBufferPtr |
xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); |
/************************************************************************ |
* * |
- * Output error handlers * |
+ * Output error handlers * |
* * |
************************************************************************/ |
/** |
@@ -381,17 +389,13 @@ htmlSaveErr(int code, xmlNodePtr node, const char *extra) |
/************************************************************************ |
* * |
- * Dumping HTML tree content to a simple buffer * |
+ * Dumping HTML tree content to a simple buffer * |
* * |
************************************************************************/ |
-static int |
-htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
- int format); |
- |
/** |
- * htmlNodeDumpFormat: |
- * @buf: the HTML buffer output |
+ * htmlBufNodeDumpFormat: |
+ * @buf: the xmlBufPtr output |
* @doc: the document |
* @cur: the current node |
* @format: should formatting spaces been added |
@@ -400,10 +404,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
* |
* Returns the number of byte written or -1 in case of error |
*/ |
-static int |
-htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
+static size_t |
+htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
int format) { |
- unsigned int use; |
+ size_t use; |
int ret; |
xmlOutputBufferPtr outbuf; |
@@ -426,10 +430,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
outbuf->context = NULL; |
outbuf->written = 0; |
- use = buf->use; |
+ use = xmlBufUse(buf); |
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); |
xmlFree(outbuf); |
- ret = buf->use - use; |
+ ret = xmlBufUse(buf) - use; |
return (ret); |
} |
@@ -446,9 +450,24 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
*/ |
int |
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { |
+ xmlBufPtr buffer; |
+ size_t ret; |
+ |
+ if ((buf == NULL) || (cur == NULL)) |
+ return(-1); |
+ |
xmlInitParser(); |
+ buffer = xmlBufFromBuffer(buf); |
+ if (buffer == NULL) |
+ return(-1); |
+ |
+ ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); |
- return(htmlNodeDumpFormat(buf, doc, cur, 1)); |
+ xmlBufBackToBuffer(buffer); |
+ |
+ if (ret > INT_MAX) |
+ return(-1); |
+ return((int) ret); |
} |
/** |
@@ -481,7 +500,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, |
if (enc != XML_CHAR_ENCODING_UTF8) { |
handler = xmlFindCharEncodingHandler(encoding); |
if (handler == NULL) |
- return(-1); |
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
} |
} |
@@ -493,7 +512,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, |
if (handler == NULL) |
handler = xmlFindCharEncodingHandler("ascii"); |
- /* |
+ /* |
* save the content to a temp buffer. |
*/ |
buf = xmlOutputBufferCreateFile(out, handler); |
@@ -562,11 +581,9 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { |
} |
handler = xmlFindCharEncodingHandler(encoding); |
- if (handler == NULL) { |
- *mem = NULL; |
- *size = 0; |
- return; |
- } |
+ if (handler == NULL) |
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
+ |
} else { |
handler = xmlFindCharEncodingHandler(encoding); |
} |
@@ -587,15 +604,15 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { |
return; |
} |
- htmlDocContentDumpFormatOutput(buf, cur, NULL, format); |
+ htmlDocContentDumpFormatOutput(buf, cur, NULL, format); |
xmlOutputBufferFlush(buf); |
if (buf->conv != NULL) { |
- *size = buf->conv->use; |
- *mem = xmlStrndup(buf->conv->content, *size); |
+ *size = xmlBufUse(buf->conv); |
+ *mem = xmlStrndup(xmlBufContent(buf->conv), *size); |
} else { |
- *size = buf->buffer->use; |
- *mem = xmlStrndup(buf->buffer->content, *size); |
+ *size = xmlBufUse(buf->buffer); |
+ *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); |
} |
(void)xmlOutputBufferClose(buf); |
} |
@@ -617,7 +634,7 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { |
/************************************************************************ |
* * |
- * Dumping HTML tree content to an I/O output buffer * |
+ * Dumping HTML tree content to an I/O output buffer * |
* * |
************************************************************************/ |
@@ -628,7 +645,7 @@ void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); |
* @buf: the HTML buffer output |
* @doc: the document |
* @encoding: the encoding string |
- * |
+ * |
* TODO: check whether encoding is needed |
* |
* Dump the HTML document DTD, if any. |
@@ -646,14 +663,14 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
xmlOutputBufferWriteString(buf, (const char *)cur->name); |
if (cur->ExternalID != NULL) { |
xmlOutputBufferWriteString(buf, " PUBLIC "); |
- xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); |
+ xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); |
if (cur->SystemID != NULL) { |
xmlOutputBufferWriteString(buf, " "); |
- xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); |
- } |
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID); |
+ } |
} else if (cur->SystemID != NULL) { |
xmlOutputBufferWriteString(buf, " SYSTEM "); |
- xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); |
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID); |
} |
xmlOutputBufferWriteString(buf, ">\n"); |
} |
@@ -673,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, |
xmlChar *value; |
/* |
- * TODO: The html output method should not escape a & character |
- * occurring in an attribute value immediately followed by |
- * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). |
+ * The html output method should not escape a & character |
+ * occurring in an attribute value immediately followed by |
+ * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). |
+ * This is implemented in xmlEncodeEntitiesReentrant |
*/ |
if (cur == NULL) { |
@@ -698,20 +716,51 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, |
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) || |
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && |
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { |
- xmlChar *escaped; |
xmlChar *tmp = value; |
+ /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ |
+ xmlBufCCat(buf->buffer, "\""); |
while (IS_BLANK_CH(*tmp)) tmp++; |
- escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); |
- if (escaped != NULL) { |
- xmlBufferWriteQuotedString(buf->buffer, escaped); |
- xmlFree(escaped); |
- } else { |
- xmlBufferWriteQuotedString(buf->buffer, value); |
+ /* URI Escape everything, except server side includes. */ |
+ for ( ; ; ) { |
+ xmlChar *escaped; |
+ xmlChar endChar; |
+ xmlChar *end = NULL; |
+ xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--"); |
+ if (start != NULL) { |
+ end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->"); |
+ if (end != NULL) { |
+ *start = '\0'; |
+ } |
+ } |
+ |
+ /* Escape the whole string, or until start (set to '\0'). */ |
+ escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); |
+ if (escaped != NULL) { |
+ xmlBufCat(buf->buffer, escaped); |
+ xmlFree(escaped); |
+ } else { |
+ xmlBufCat(buf->buffer, tmp); |
+ } |
+ |
+ if (end == NULL) { /* Everything has been written. */ |
+ break; |
+ } |
+ |
+ /* Do not escape anything within server side includes. */ |
+ *start = '<'; /* Restore the first character of "<!--". */ |
+ end += 3; /* strlen("-->") */ |
+ endChar = *end; |
+ *end = '\0'; |
+ xmlBufCat(buf->buffer, start); |
+ *end = endChar; |
+ tmp = end; |
} |
+ |
+ xmlBufCCat(buf->buffer, "\""); |
} else { |
- xmlBufferWriteQuotedString(buf->buffer, value); |
+ xmlBufWriteQuotedString(buf->buffer, value); |
} |
xmlFree(value); |
} else { |
@@ -1061,7 +1110,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) { |
handler = xmlFindCharEncodingHandler(encoding); |
if (handler == NULL) |
- return(-1); |
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
} else { |
handler = xmlFindCharEncodingHandler(encoding); |
} |
@@ -1101,7 +1150,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { |
if ((cur == NULL) || (filename == NULL)) |
return(-1); |
- |
+ |
xmlInitParser(); |
encoding = (const char *) htmlGetMetaEncoding(cur); |
@@ -1120,7 +1169,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { |
handler = xmlFindCharEncodingHandler(encoding); |
if (handler == NULL) |
- return(-1); |
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
} |
} |
@@ -1132,7 +1181,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { |
if (handler == NULL) |
handler = xmlFindCharEncodingHandler("ascii"); |
- /* |
+ /* |
* save the content to a temp buffer. |
*/ |
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); |
@@ -1152,7 +1201,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { |
* @encoding: the document encoding |
* |
* Dump an HTML document to a file using a given encoding. |
- * |
+ * |
* returns: the number of byte written or -1 in case of failure. |
*/ |
int |
@@ -1181,7 +1230,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, |
handler = xmlFindCharEncodingHandler(encoding); |
if (handler == NULL) |
- return(-1); |
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
} |
htmlSetMetaEncoding(cur, (const xmlChar *) encoding); |
} else { |
@@ -1196,7 +1245,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, |
if (handler == NULL) |
handler = xmlFindCharEncodingHandler("ascii"); |
- /* |
+ /* |
* save the content to a temp buffer. |
*/ |
buf = xmlOutputBufferCreateFilename(filename, handler, 0); |
@@ -1216,7 +1265,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, |
* |
* Dump an HTML document to a file using a given encoding |
* and formatting returns/spaces are added. |
- * |
+ * |
* returns: the number of byte written or -1 in case of failure. |
*/ |
int |