| Index: third_party/libxml/src/parserInternals.c
|
| diff --git a/third_party/libxml/src/parserInternals.c b/third_party/libxml/src/parserInternals.c
|
| index 2b8646c2187addef1de6691532cdd3b210145a27..bfc778ac1f778c5dab5b5fcd91dbe994e8f8d58c 100644
|
| --- a/third_party/libxml/src/parserInternals.c
|
| +++ b/third_party/libxml/src/parserInternals.c
|
| @@ -55,6 +55,10 @@
|
| #include <libxml/globals.h>
|
| #include <libxml/chvalid.h>
|
|
|
| +#define CUR(ctxt) ctxt->input->cur
|
| +#define END(ctxt) ctxt->input->end
|
| +#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
|
| +
|
| #include "buf.h"
|
| #include "enc.h"
|
|
|
| @@ -165,7 +169,7 @@ __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
|
| *
|
| * Handle an internal error
|
| */
|
| -static void
|
| +static void LIBXML_ATTR_FORMAT(2,0)
|
| xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
|
| {
|
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
|
| @@ -193,7 +197,7 @@ xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
|
| *
|
| * n encoding error
|
| */
|
| -static void
|
| +static void LIBXML_ATTR_FORMAT(3,0)
|
| xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| const char *msg, int val)
|
| {
|
| @@ -294,7 +298,7 @@ xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS
|
| */
|
| int
|
| xmlParserInputGrow(xmlParserInputPtr in, int len) {
|
| - size_t ret;
|
| + int ret;
|
| size_t indx;
|
| const xmlChar *content;
|
|
|
| @@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
|
| (ctxt->input == NULL))
|
| return;
|
|
|
| - if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
|
| - if ((*ctxt->input->cur == 0) &&
|
| - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
|
| - (ctxt->instate != XML_PARSER_COMMENT)) {
|
| - /*
|
| - * If we are at the end of the current entity and
|
| - * the context allows it, we pop consumed entities
|
| - * automatically.
|
| - * the auto closing should be blocked in other cases
|
| - */
|
| + if (!(VALID_CTXT(ctxt))) {
|
| + xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
|
| + ctxt->errNo = XML_ERR_INTERNAL_ERROR;
|
| + xmlStopParser(ctxt);
|
| + return;
|
| + }
|
| +
|
| + if ((*ctxt->input->cur == 0) &&
|
| + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
|
| + if ((ctxt->instate != XML_PARSER_COMMENT))
|
| xmlPopInput(ctxt);
|
| - } else {
|
| - const unsigned char *cur;
|
| - unsigned char c;
|
| + return;
|
| + }
|
|
|
| - /*
|
| - * 2.11 End-of-Line Handling
|
| - * the literal two-character sequence "#xD#xA" or a standalone
|
| - * literal #xD, an XML processor must pass to the application
|
| - * the single character #xA.
|
| - */
|
| - if (*(ctxt->input->cur) == '\n') {
|
| - ctxt->input->line++; ctxt->input->col = 1;
|
| - } else
|
| - ctxt->input->col++;
|
| + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
|
| + const unsigned char *cur;
|
| + unsigned char c;
|
|
|
| - /*
|
| - * We are supposed to handle UTF8, check it's valid
|
| - * From rfc2044: encoding of the Unicode values on UTF-8:
|
| - *
|
| - * UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
| - * 0000 0000-0000 007F 0xxxxxxx
|
| - * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
| - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
| - *
|
| - * Check for the 0x110000 limit too
|
| - */
|
| - cur = ctxt->input->cur;
|
| + /*
|
| + * 2.11 End-of-Line Handling
|
| + * the literal two-character sequence "#xD#xA" or a standalone
|
| + * literal #xD, an XML processor must pass to the application
|
| + * the single character #xA.
|
| + */
|
| + if (*(ctxt->input->cur) == '\n') {
|
| + ctxt->input->line++; ctxt->input->col = 1;
|
| + } else
|
| + ctxt->input->col++;
|
|
|
| - c = *cur;
|
| - if (c & 0x80) {
|
| - if (c == 0xC0)
|
| - goto encoding_error;
|
| - if (cur[1] == 0) {
|
| + /*
|
| + * We are supposed to handle UTF8, check it's valid
|
| + * From rfc2044: encoding of the Unicode values on UTF-8:
|
| + *
|
| + * UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
| + * 0000 0000-0000 007F 0xxxxxxx
|
| + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
| + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
| + *
|
| + * Check for the 0x110000 limit too
|
| + */
|
| + cur = ctxt->input->cur;
|
| +
|
| + c = *cur;
|
| + if (c & 0x80) {
|
| + if (c == 0xC0)
|
| + goto encoding_error;
|
| + if (cur[1] == 0) {
|
| + xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| + cur = ctxt->input->cur;
|
| + }
|
| + if ((cur[1] & 0xc0) != 0x80)
|
| + goto encoding_error;
|
| + if ((c & 0xe0) == 0xe0) {
|
| + unsigned int val;
|
| +
|
| + if (cur[2] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| cur = ctxt->input->cur;
|
| }
|
| - if ((cur[1] & 0xc0) != 0x80)
|
| + if ((cur[2] & 0xc0) != 0x80)
|
| goto encoding_error;
|
| - if ((c & 0xe0) == 0xe0) {
|
| - unsigned int val;
|
| -
|
| - if (cur[2] == 0) {
|
| + if ((c & 0xf0) == 0xf0) {
|
| + if (cur[3] == 0) {
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| cur = ctxt->input->cur;
|
| }
|
| - if ((cur[2] & 0xc0) != 0x80)
|
| + if (((c & 0xf8) != 0xf0) ||
|
| + ((cur[3] & 0xc0) != 0x80))
|
| goto encoding_error;
|
| - if ((c & 0xf0) == 0xf0) {
|
| - if (cur[3] == 0) {
|
| - xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| - cur = ctxt->input->cur;
|
| - }
|
| - if (((c & 0xf8) != 0xf0) ||
|
| - ((cur[3] & 0xc0) != 0x80))
|
| - goto encoding_error;
|
| - /* 4-byte code */
|
| - ctxt->input->cur += 4;
|
| - val = (cur[0] & 0x7) << 18;
|
| - val |= (cur[1] & 0x3f) << 12;
|
| - val |= (cur[2] & 0x3f) << 6;
|
| - val |= cur[3] & 0x3f;
|
| - } else {
|
| - /* 3-byte code */
|
| - ctxt->input->cur += 3;
|
| - val = (cur[0] & 0xf) << 12;
|
| - val |= (cur[1] & 0x3f) << 6;
|
| - val |= cur[2] & 0x3f;
|
| - }
|
| - if (((val > 0xd7ff) && (val < 0xe000)) ||
|
| - ((val > 0xfffd) && (val < 0x10000)) ||
|
| - (val >= 0x110000)) {
|
| - xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
|
| - "Char 0x%X out of allowed range\n",
|
| - val);
|
| - }
|
| - } else
|
| - /* 2-byte code */
|
| - ctxt->input->cur += 2;
|
| + /* 4-byte code */
|
| + ctxt->input->cur += 4;
|
| + val = (cur[0] & 0x7) << 18;
|
| + val |= (cur[1] & 0x3f) << 12;
|
| + val |= (cur[2] & 0x3f) << 6;
|
| + val |= cur[3] & 0x3f;
|
| + } else {
|
| + /* 3-byte code */
|
| + ctxt->input->cur += 3;
|
| + val = (cur[0] & 0xf) << 12;
|
| + val |= (cur[1] & 0x3f) << 6;
|
| + val |= cur[2] & 0x3f;
|
| + }
|
| + if (((val > 0xd7ff) && (val < 0xe000)) ||
|
| + ((val > 0xfffd) && (val < 0x10000)) ||
|
| + (val >= 0x110000)) {
|
| + xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
|
| + "Char 0x%X out of allowed range\n",
|
| + val);
|
| + }
|
| } else
|
| - /* 1-byte code */
|
| - ctxt->input->cur++;
|
| + /* 2-byte code */
|
| + ctxt->input->cur += 2;
|
| + } else
|
| + /* 1-byte code */
|
| + ctxt->input->cur++;
|
|
|
| - ctxt->nbChars++;
|
| - if (*ctxt->input->cur == 0)
|
| - xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| - }
|
| + ctxt->nbChars++;
|
| + if (*ctxt->input->cur == 0)
|
| + xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| } else {
|
| /*
|
| * Assume it's a fixed length encoding (1) with
|
|
|