Index: third_party/libxml/parserInternals.c |
diff --git a/third_party/libxml/parserInternals.c b/third_party/libxml/parserInternals.c |
index d7aa4cfc3c7b74771d75449da0716ea2de8296e1..2404ddfcb95c64c92f19e8d626f01078d6b8c1b4 100644 |
--- a/third_party/libxml/parserInternals.c |
+++ b/third_party/libxml/parserInternals.c |
@@ -494,20 +494,26 @@ xmlNextChar(xmlParserCtxtPtr ctxt) |
if (c & 0x80) { |
if (c == 0xC0) |
goto encoding_error; |
- if (cur[1] == 0) |
+ if (cur[1] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
+ cur = ctxt->input->cur; |
+ } |
if ((cur[1] & 0xc0) != 0x80) |
goto encoding_error; |
if ((c & 0xe0) == 0xe0) { |
unsigned int val; |
- if (cur[2] == 0) |
+ if (cur[2] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
+ cur = ctxt->input->cur; |
+ } |
if ((cur[2] & 0xc0) != 0x80) |
goto encoding_error; |
if ((c & 0xf0) == 0xf0) { |
- if (cur[3] == 0) |
+ if (cur[3] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
+ cur = ctxt->input->cur; |
+ } |
if (((c & 0xf8) != 0xf0) || |
((cur[3] & 0xc0) != 0x80)) |
goto encoding_error; |
@@ -640,18 +646,24 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { |
if (c & 0x80) { |
if (((c & 0x40) == 0) || (c == 0xC0)) |
goto encoding_error; |
- if (cur[1] == 0) |
+ if (cur[1] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
+ cur = ctxt->input->cur; |
+ } |
if ((cur[1] & 0xc0) != 0x80) |
goto encoding_error; |
if ((c & 0xe0) == 0xe0) { |
- if (cur[2] == 0) |
+ if (cur[2] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
+ cur = ctxt->input->cur; |
+ } |
if ((cur[2] & 0xc0) != 0x80) |
goto encoding_error; |
if ((c & 0xf0) == 0xf0) { |
- if (cur[3] == 0) |
+ if (cur[3] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
+ cur = ctxt->input->cur; |
+ } |
if (((c & 0xf8) != 0xf0) || |
((cur[3] & 0xc0) != 0x80)) |
goto encoding_error; |
@@ -933,6 +945,17 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { |
* * |
************************************************************************/ |
+/* defined in encoding.c, not public */ |
+int |
+xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, |
+ xmlBufferPtr in, int len); |
+ |
+static int |
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, |
+ xmlCharEncodingHandlerPtr handler, int len); |
+static int |
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, |
+ xmlCharEncodingHandlerPtr handler, int len); |
/** |
* xmlSwitchEncoding: |
* @ctxt: the parser context |
@@ -947,6 +970,7 @@ int |
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) |
{ |
xmlCharEncodingHandlerPtr handler; |
+ int len = -1; |
if (ctxt == NULL) return(-1); |
switch (enc) { |
@@ -990,9 +1014,33 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) |
(ctxt->input->cur[2] == 0xBF)) { |
ctxt->input->cur += 3; |
} |
- break ; |
- default: |
- break; |
+ len = 90; |
+ break; |
+ case XML_CHAR_ENCODING_UCS2: |
+ len = 90; |
+ break; |
+ case XML_CHAR_ENCODING_UCS4BE: |
+ case XML_CHAR_ENCODING_UCS4LE: |
+ case XML_CHAR_ENCODING_UCS4_2143: |
+ case XML_CHAR_ENCODING_UCS4_3412: |
+ len = 180; |
+ break; |
+ case XML_CHAR_ENCODING_EBCDIC: |
+ case XML_CHAR_ENCODING_8859_1: |
+ case XML_CHAR_ENCODING_8859_2: |
+ case XML_CHAR_ENCODING_8859_3: |
+ case XML_CHAR_ENCODING_8859_4: |
+ case XML_CHAR_ENCODING_8859_5: |
+ case XML_CHAR_ENCODING_8859_6: |
+ case XML_CHAR_ENCODING_8859_7: |
+ case XML_CHAR_ENCODING_8859_8: |
+ case XML_CHAR_ENCODING_8859_9: |
+ case XML_CHAR_ENCODING_ASCII: |
+ case XML_CHAR_ENCODING_2022_JP: |
+ case XML_CHAR_ENCODING_SHIFT_JIS: |
+ case XML_CHAR_ENCODING_EUC_JP: |
+ len = 45; |
+ break; |
} |
handler = xmlGetCharEncodingHandler(enc); |
if (handler == NULL) { |
@@ -1083,7 +1131,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) |
if (handler == NULL) |
return(-1); |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
- return(xmlSwitchToEncoding(ctxt, handler)); |
+ return(xmlSwitchToEncodingInt(ctxt, handler, len)); |
} |
/** |
@@ -1091,15 +1139,16 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) |
* @ctxt: the parser context |
* @input: the input stream |
* @handler: the encoding handler |
+ * @len: the number of bytes to convert for the first line or -1 |
* |
* change the input functions when discovering the character encoding |
* of a given entity. |
* |
* Returns 0 in case of success, -1 otherwise |
*/ |
-int |
-xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, |
- xmlCharEncodingHandlerPtr handler) |
+static int |
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, |
+ xmlCharEncodingHandlerPtr handler, int len) |
{ |
int nbchars; |
@@ -1196,9 +1245,10 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, |
* parsed with the autodetected encoding |
* into the parser reading buffer. |
*/ |
- nbchars = xmlCharEncFirstLine(input->buf->encoder, |
- input->buf->buffer, |
- input->buf->raw); |
+ nbchars = xmlCharEncFirstLineInt(input->buf->encoder, |
+ input->buf->buffer, |
+ input->buf->raw, |
+ len); |
} |
if (nbchars < 0) { |
xmlErrInternal(ctxt, |
@@ -1224,8 +1274,9 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, |
} |
/** |
- * xmlSwitchToEncoding: |
+ * xmlSwitchInputEncoding: |
* @ctxt: the parser context |
+ * @input: the input stream |
* @handler: the encoding handler |
* |
* change the input functions when discovering the character encoding |
@@ -1234,13 +1285,32 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, |
* Returns 0 in case of success, -1 otherwise |
*/ |
int |
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) |
-{ |
+xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, |
+ xmlCharEncodingHandlerPtr handler) { |
+ return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); |
+} |
+ |
+/** |
+ * xmlSwitchToEncodingInt: |
+ * @ctxt: the parser context |
+ * @handler: the encoding handler |
+ * @len: the lenght to convert or -1 |
+ * |
+ * change the input functions when discovering the character encoding |
+ * of a given entity, and convert only @len bytes of the output, this |
+ * is needed on auto detect to allows any declared encoding later to |
+ * convert the actual content after the xmlDecl |
+ * |
+ * Returns 0 in case of success, -1 otherwise |
+ */ |
+static int |
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, |
+ xmlCharEncodingHandlerPtr handler, int len) { |
int ret = 0; |
if (handler != NULL) { |
if (ctxt->input != NULL) { |
- ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler); |
+ ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); |
} else { |
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", |
NULL); |
@@ -1250,11 +1320,27 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) |
* The parsing is now done in UTF8 natively |
*/ |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
- } else |
+ } else |
return(-1); |
return(ret); |
} |
+/** |
+ * xmlSwitchToEncoding: |
+ * @ctxt: the parser context |
+ * @handler: the encoding handler |
+ * |
+ * change the input functions when discovering the character encoding |
+ * of a given entity. |
+ * |
+ * Returns 0 in case of success, -1 otherwise |
+ */ |
+int |
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) |
+{ |
+ return (xmlSwitchToEncodingInt(ctxt, handler, -1)); |
+} |
+ |
/************************************************************************ |
* * |
* Commodity functions to handle entities processing * |
@@ -1399,7 +1485,8 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { |
if (input == NULL) { |
return(NULL); |
} |
- input->filename = (char *) entity->URI; |
+ if (entity->URI != NULL) |
+ input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); |
input->base = entity->content; |
input->cur = entity->content; |
input->length = entity->length; |
@@ -1669,6 +1756,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) |
ctxt->depth = 0; |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
ctxt->catalogs = NULL; |
+ ctxt->nbentities = 0; |
xmlInitNodeInfoSeq(&ctxt->node_seq); |
return(0); |
} |
@@ -1694,6 +1782,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) |
if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); |
if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); |
if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); |
+ if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); |
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); |
if (ctxt->version != NULL) xmlFree((char *) ctxt->version); |
if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); |
@@ -2065,7 +2154,7 @@ xmlKeepBlanksDefault(int val) { |
int old = xmlKeepBlanksDefaultValue; |
xmlKeepBlanksDefaultValue = val; |
- xmlIndentTreeOutput = !val; |
+ if (!val) xmlIndentTreeOutput = 1; |
return(old); |
} |