| Index: third_party/libxml/parserInternals.c
|
| diff --git a/third_party/libxml/parserInternals.c b/third_party/libxml/parserInternals.c
|
| index d7aa4cfc3c7b74771d75449da0716ea2de8296e1..2404ddfcb95c64c92f19e8d626f01078d6b8c1b4 100644
|
| --- a/third_party/libxml/parserInternals.c
|
| +++ b/third_party/libxml/parserInternals.c
|
| @@ -494,20 +494,26 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
|
| if (c & 0x80) {
|
| if (c == 0xC0)
|
| goto encoding_error;
|
| - if (cur[1] == 0)
|
| + if (cur[1] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| + cur = ctxt->input->cur;
|
| + }
|
| if ((cur[1] & 0xc0) != 0x80)
|
| goto encoding_error;
|
| if ((c & 0xe0) == 0xe0) {
|
| unsigned int val;
|
|
|
| - if (cur[2] == 0)
|
| + if (cur[2] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| + cur = ctxt->input->cur;
|
| + }
|
| if ((cur[2] & 0xc0) != 0x80)
|
| goto encoding_error;
|
| if ((c & 0xf0) == 0xf0) {
|
| - if (cur[3] == 0)
|
| + if (cur[3] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| + cur = ctxt->input->cur;
|
| + }
|
| if (((c & 0xf8) != 0xf0) ||
|
| ((cur[3] & 0xc0) != 0x80))
|
| goto encoding_error;
|
| @@ -640,18 +646,24 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
|
| if (c & 0x80) {
|
| if (((c & 0x40) == 0) || (c == 0xC0))
|
| goto encoding_error;
|
| - if (cur[1] == 0)
|
| + if (cur[1] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| + cur = ctxt->input->cur;
|
| + }
|
| if ((cur[1] & 0xc0) != 0x80)
|
| goto encoding_error;
|
| if ((c & 0xe0) == 0xe0) {
|
| - if (cur[2] == 0)
|
| + if (cur[2] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| + cur = ctxt->input->cur;
|
| + }
|
| if ((cur[2] & 0xc0) != 0x80)
|
| goto encoding_error;
|
| if ((c & 0xf0) == 0xf0) {
|
| - if (cur[3] == 0)
|
| + if (cur[3] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| + cur = ctxt->input->cur;
|
| + }
|
| if (((c & 0xf8) != 0xf0) ||
|
| ((cur[3] & 0xc0) != 0x80))
|
| goto encoding_error;
|
| @@ -933,6 +945,17 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
|
| * *
|
| ************************************************************************/
|
|
|
| +/* defined in encoding.c, not public */
|
| +int
|
| +xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
|
| + xmlBufferPtr in, int len);
|
| +
|
| +static int
|
| +xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
|
| + xmlCharEncodingHandlerPtr handler, int len);
|
| +static int
|
| +xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
| + xmlCharEncodingHandlerPtr handler, int len);
|
| /**
|
| * xmlSwitchEncoding:
|
| * @ctxt: the parser context
|
| @@ -947,6 +970,7 @@ int
|
| xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
| {
|
| xmlCharEncodingHandlerPtr handler;
|
| + int len = -1;
|
|
|
| if (ctxt == NULL) return(-1);
|
| switch (enc) {
|
| @@ -990,9 +1014,33 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
| (ctxt->input->cur[2] == 0xBF)) {
|
| ctxt->input->cur += 3;
|
| }
|
| - break ;
|
| - default:
|
| - break;
|
| + len = 90;
|
| + break;
|
| + case XML_CHAR_ENCODING_UCS2:
|
| + len = 90;
|
| + break;
|
| + case XML_CHAR_ENCODING_UCS4BE:
|
| + case XML_CHAR_ENCODING_UCS4LE:
|
| + case XML_CHAR_ENCODING_UCS4_2143:
|
| + case XML_CHAR_ENCODING_UCS4_3412:
|
| + len = 180;
|
| + break;
|
| + case XML_CHAR_ENCODING_EBCDIC:
|
| + case XML_CHAR_ENCODING_8859_1:
|
| + case XML_CHAR_ENCODING_8859_2:
|
| + case XML_CHAR_ENCODING_8859_3:
|
| + case XML_CHAR_ENCODING_8859_4:
|
| + case XML_CHAR_ENCODING_8859_5:
|
| + case XML_CHAR_ENCODING_8859_6:
|
| + case XML_CHAR_ENCODING_8859_7:
|
| + case XML_CHAR_ENCODING_8859_8:
|
| + case XML_CHAR_ENCODING_8859_9:
|
| + case XML_CHAR_ENCODING_ASCII:
|
| + case XML_CHAR_ENCODING_2022_JP:
|
| + case XML_CHAR_ENCODING_SHIFT_JIS:
|
| + case XML_CHAR_ENCODING_EUC_JP:
|
| + len = 45;
|
| + break;
|
| }
|
| handler = xmlGetCharEncodingHandler(enc);
|
| if (handler == NULL) {
|
| @@ -1083,7 +1131,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
| if (handler == NULL)
|
| return(-1);
|
| ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
| - return(xmlSwitchToEncoding(ctxt, handler));
|
| + return(xmlSwitchToEncodingInt(ctxt, handler, len));
|
| }
|
|
|
| /**
|
| @@ -1091,15 +1139,16 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
| * @ctxt: the parser context
|
| * @input: the input stream
|
| * @handler: the encoding handler
|
| + * @len: the number of bytes to convert for the first line or -1
|
| *
|
| * change the input functions when discovering the character encoding
|
| * of a given entity.
|
| *
|
| * Returns 0 in case of success, -1 otherwise
|
| */
|
| -int
|
| -xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
| - xmlCharEncodingHandlerPtr handler)
|
| +static int
|
| +xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
| + xmlCharEncodingHandlerPtr handler, int len)
|
| {
|
| int nbchars;
|
|
|
| @@ -1196,9 +1245,10 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
| * parsed with the autodetected encoding
|
| * into the parser reading buffer.
|
| */
|
| - nbchars = xmlCharEncFirstLine(input->buf->encoder,
|
| - input->buf->buffer,
|
| - input->buf->raw);
|
| + nbchars = xmlCharEncFirstLineInt(input->buf->encoder,
|
| + input->buf->buffer,
|
| + input->buf->raw,
|
| + len);
|
| }
|
| if (nbchars < 0) {
|
| xmlErrInternal(ctxt,
|
| @@ -1224,8 +1274,9 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
| }
|
|
|
| /**
|
| - * xmlSwitchToEncoding:
|
| + * xmlSwitchInputEncoding:
|
| * @ctxt: the parser context
|
| + * @input: the input stream
|
| * @handler: the encoding handler
|
| *
|
| * change the input functions when discovering the character encoding
|
| @@ -1234,13 +1285,32 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
| * Returns 0 in case of success, -1 otherwise
|
| */
|
| int
|
| -xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
| -{
|
| +xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
| + xmlCharEncodingHandlerPtr handler) {
|
| + return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
|
| +}
|
| +
|
| +/**
|
| + * xmlSwitchToEncodingInt:
|
| + * @ctxt: the parser context
|
| + * @handler: the encoding handler
|
| + * @len: the lenght to convert or -1
|
| + *
|
| + * change the input functions when discovering the character encoding
|
| + * of a given entity, and convert only @len bytes of the output, this
|
| + * is needed on auto detect to allows any declared encoding later to
|
| + * convert the actual content after the xmlDecl
|
| + *
|
| + * Returns 0 in case of success, -1 otherwise
|
| + */
|
| +static int
|
| +xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
|
| + xmlCharEncodingHandlerPtr handler, int len) {
|
| int ret = 0;
|
|
|
| if (handler != NULL) {
|
| if (ctxt->input != NULL) {
|
| - ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
|
| + ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
|
| } else {
|
| xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
|
| NULL);
|
| @@ -1250,11 +1320,27 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
| * The parsing is now done in UTF8 natively
|
| */
|
| ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
| - } else
|
| + } else
|
| return(-1);
|
| return(ret);
|
| }
|
|
|
| +/**
|
| + * xmlSwitchToEncoding:
|
| + * @ctxt: the parser context
|
| + * @handler: the encoding handler
|
| + *
|
| + * change the input functions when discovering the character encoding
|
| + * of a given entity.
|
| + *
|
| + * Returns 0 in case of success, -1 otherwise
|
| + */
|
| +int
|
| +xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
| +{
|
| + return (xmlSwitchToEncodingInt(ctxt, handler, -1));
|
| +}
|
| +
|
| /************************************************************************
|
| * *
|
| * Commodity functions to handle entities processing *
|
| @@ -1399,7 +1485,8 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
|
| if (input == NULL) {
|
| return(NULL);
|
| }
|
| - input->filename = (char *) entity->URI;
|
| + if (entity->URI != NULL)
|
| + input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
|
| input->base = entity->content;
|
| input->cur = entity->content;
|
| input->length = entity->length;
|
| @@ -1669,6 +1756,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
|
| ctxt->depth = 0;
|
| ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
| ctxt->catalogs = NULL;
|
| + ctxt->nbentities = 0;
|
| xmlInitNodeInfoSeq(&ctxt->node_seq);
|
| return(0);
|
| }
|
| @@ -1694,6 +1782,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
|
| if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
|
| if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
|
| if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
|
| + if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
|
| if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
|
| if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
|
| if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
|
| @@ -2065,7 +2154,7 @@ xmlKeepBlanksDefault(int val) {
|
| int old = xmlKeepBlanksDefaultValue;
|
|
|
| xmlKeepBlanksDefaultValue = val;
|
| - xmlIndentTreeOutput = !val;
|
| + if (!val) xmlIndentTreeOutput = 1;
|
| return(old);
|
| }
|
|
|
|
|