Index: third_party/libxml/src/parserInternals.c |
diff --git a/third_party/libxml/src/parserInternals.c b/third_party/libxml/src/parserInternals.c |
index 2b8646c2187addef1de6691532cdd3b210145a27..bfc778ac1f778c5dab5b5fcd91dbe994e8f8d58c 100644 |
--- a/third_party/libxml/src/parserInternals.c |
+++ b/third_party/libxml/src/parserInternals.c |
@@ -55,6 +55,10 @@ |
#include <libxml/globals.h> |
#include <libxml/chvalid.h> |
+#define CUR(ctxt) ctxt->input->cur |
+#define END(ctxt) ctxt->input->end |
+#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) |
+ |
#include "buf.h" |
#include "enc.h" |
@@ -165,7 +169,7 @@ __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, |
* |
* Handle an internal error |
*/ |
-static void |
+static void LIBXML_ATTR_FORMAT(2,0) |
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) |
{ |
if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
@@ -193,7 +197,7 @@ xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) |
* |
* n encoding error |
*/ |
-static void |
+static void LIBXML_ATTR_FORMAT(3,0) |
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
const char *msg, int val) |
{ |
@@ -294,7 +298,7 @@ xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS |
*/ |
int |
xmlParserInputGrow(xmlParserInputPtr in, int len) { |
- size_t ret; |
+ int ret; |
size_t indx; |
const xmlChar *content; |
@@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt) |
(ctxt->input == NULL)) |
return; |
- if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
- if ((*ctxt->input->cur == 0) && |
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && |
- (ctxt->instate != XML_PARSER_COMMENT)) { |
- /* |
- * If we are at the end of the current entity and |
- * the context allows it, we pop consumed entities |
- * automatically. |
- * the auto closing should be blocked in other cases |
- */ |
+ if (!(VALID_CTXT(ctxt))) { |
+ xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); |
+ ctxt->errNo = XML_ERR_INTERNAL_ERROR; |
+ xmlStopParser(ctxt); |
+ return; |
+ } |
+ |
+ if ((*ctxt->input->cur == 0) && |
+ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { |
+ if ((ctxt->instate != XML_PARSER_COMMENT)) |
xmlPopInput(ctxt); |
- } else { |
- const unsigned char *cur; |
- unsigned char c; |
+ return; |
+ } |
- /* |
- * 2.11 End-of-Line Handling |
- * the literal two-character sequence "#xD#xA" or a standalone |
- * literal #xD, an XML processor must pass to the application |
- * the single character #xA. |
- */ |
- if (*(ctxt->input->cur) == '\n') { |
- ctxt->input->line++; ctxt->input->col = 1; |
- } else |
- ctxt->input->col++; |
+ if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
+ const unsigned char *cur; |
+ unsigned char c; |
- /* |
- * We are supposed to handle UTF8, check it's valid |
- * From rfc2044: encoding of the Unicode values on UTF-8: |
- * |
- * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
- * 0000 0000-0000 007F 0xxxxxxx |
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
- * |
- * Check for the 0x110000 limit too |
- */ |
- cur = ctxt->input->cur; |
+ /* |
+ * 2.11 End-of-Line Handling |
+ * the literal two-character sequence "#xD#xA" or a standalone |
+ * literal #xD, an XML processor must pass to the application |
+ * the single character #xA. |
+ */ |
+ if (*(ctxt->input->cur) == '\n') { |
+ ctxt->input->line++; ctxt->input->col = 1; |
+ } else |
+ ctxt->input->col++; |
- c = *cur; |
- if (c & 0x80) { |
- if (c == 0xC0) |
- goto encoding_error; |
- if (cur[1] == 0) { |
+ /* |
+ * We are supposed to handle UTF8, check it's valid |
+ * From rfc2044: encoding of the Unicode values on UTF-8: |
+ * |
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
+ * 0000 0000-0000 007F 0xxxxxxx |
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
+ * |
+ * Check for the 0x110000 limit too |
+ */ |
+ cur = ctxt->input->cur; |
+ |
+ c = *cur; |
+ if (c & 0x80) { |
+ if (c == 0xC0) |
+ goto encoding_error; |
+ if (cur[1] == 0) { |
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
+ cur = ctxt->input->cur; |
+ } |
+ if ((cur[1] & 0xc0) != 0x80) |
+ goto encoding_error; |
+ if ((c & 0xe0) == 0xe0) { |
+ unsigned int val; |
+ |
+ if (cur[2] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
cur = ctxt->input->cur; |
} |
- if ((cur[1] & 0xc0) != 0x80) |
+ if ((cur[2] & 0xc0) != 0x80) |
goto encoding_error; |
- if ((c & 0xe0) == 0xe0) { |
- unsigned int val; |
- |
- if (cur[2] == 0) { |
+ if ((c & 0xf0) == 0xf0) { |
+ if (cur[3] == 0) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
cur = ctxt->input->cur; |
} |
- if ((cur[2] & 0xc0) != 0x80) |
+ if (((c & 0xf8) != 0xf0) || |
+ ((cur[3] & 0xc0) != 0x80)) |
goto encoding_error; |
- if ((c & 0xf0) == 0xf0) { |
- if (cur[3] == 0) { |
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
- cur = ctxt->input->cur; |
- } |
- if (((c & 0xf8) != 0xf0) || |
- ((cur[3] & 0xc0) != 0x80)) |
- goto encoding_error; |
- /* 4-byte code */ |
- ctxt->input->cur += 4; |
- val = (cur[0] & 0x7) << 18; |
- val |= (cur[1] & 0x3f) << 12; |
- val |= (cur[2] & 0x3f) << 6; |
- val |= cur[3] & 0x3f; |
- } else { |
- /* 3-byte code */ |
- ctxt->input->cur += 3; |
- val = (cur[0] & 0xf) << 12; |
- val |= (cur[1] & 0x3f) << 6; |
- val |= cur[2] & 0x3f; |
- } |
- if (((val > 0xd7ff) && (val < 0xe000)) || |
- ((val > 0xfffd) && (val < 0x10000)) || |
- (val >= 0x110000)) { |
- xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, |
- "Char 0x%X out of allowed range\n", |
- val); |
- } |
- } else |
- /* 2-byte code */ |
- ctxt->input->cur += 2; |
+ /* 4-byte code */ |
+ ctxt->input->cur += 4; |
+ val = (cur[0] & 0x7) << 18; |
+ val |= (cur[1] & 0x3f) << 12; |
+ val |= (cur[2] & 0x3f) << 6; |
+ val |= cur[3] & 0x3f; |
+ } else { |
+ /* 3-byte code */ |
+ ctxt->input->cur += 3; |
+ val = (cur[0] & 0xf) << 12; |
+ val |= (cur[1] & 0x3f) << 6; |
+ val |= cur[2] & 0x3f; |
+ } |
+ if (((val > 0xd7ff) && (val < 0xe000)) || |
+ ((val > 0xfffd) && (val < 0x10000)) || |
+ (val >= 0x110000)) { |
+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, |
+ "Char 0x%X out of allowed range\n", |
+ val); |
+ } |
} else |
- /* 1-byte code */ |
- ctxt->input->cur++; |
+ /* 2-byte code */ |
+ ctxt->input->cur += 2; |
+ } else |
+ /* 1-byte code */ |
+ ctxt->input->cur++; |
- ctxt->nbChars++; |
- if (*ctxt->input->cur == 0) |
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
- } |
+ ctxt->nbChars++; |
+ if (*ctxt->input->cur == 0) |
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
} else { |
/* |
* Assume it's a fixed length encoding (1) with |