Index: third_party/libxml/src/HTMLparser.c |
diff --git a/third_party/libxml/src/HTMLparser.c b/third_party/libxml/src/HTMLparser.c |
index 69eed2bd3ed5ac3954e77cc23fa403ab9ef86999..d1395fa507c7cdcb78e468fe3d793edf5f424536 100644 |
--- a/third_party/libxml/src/HTMLparser.c |
+++ b/third_party/libxml/src/HTMLparser.c |
@@ -105,7 +105,7 @@ htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) |
* |
* Handle a fatal parser error, i.e. violating Well-Formedness constraints |
*/ |
-static void |
+static void LIBXML_ATTR_FORMAT(3,0) |
htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
const char *msg, const xmlChar *str1, const xmlChar *str2) |
{ |
@@ -132,7 +132,7 @@ htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
* |
* Handle a fatal parser error, i.e. violating Well-Formedness constraints |
*/ |
-static void |
+static void LIBXML_ATTR_FORMAT(3,0) |
htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
const char *msg, int val) |
{ |
@@ -303,6 +303,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) |
#define UPP(val) (toupper(ctxt->input->cur[(val)])) |
#define CUR_PTR ctxt->input->cur |
+#define BASE_PTR ctxt->input->base |
#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ |
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ |
@@ -2471,6 +2472,10 @@ htmlParseName(htmlParserCtxtPtr ctxt) { |
(*in == '_') || (*in == '-') || |
(*in == ':') || (*in == '.')) |
in++; |
+ |
+ if (in == ctxt->input->end) |
+ return(NULL); |
+ |
if ((*in > 0) && (*in < 0x80)) { |
count = in - ctxt->input->cur; |
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); |
@@ -2488,6 +2493,7 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) { |
int len = 0, l; |
int c; |
int count = 0; |
+ const xmlChar *base = ctxt->input->base; |
/* |
* Handler for more complex cases |
@@ -2513,7 +2519,18 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) { |
len += l; |
NEXTL(l); |
c = CUR_CHAR(l); |
+ if (ctxt->input->base != base) { |
+ /* |
+ * We changed encoding from an unknown encoding |
+ * Input buffer changed location, so we better start again |
+ */ |
+ return(htmlParseNameComplex(ctxt)); |
+ } |
} |
+ |
+ if (ctxt->input->base > ctxt->input->cur - len) |
+ return(NULL); |
+ |
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
} |
@@ -2765,31 +2782,43 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) { |
static xmlChar * |
htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { |
- const xmlChar *q; |
+ size_t len = 0, startPosition = 0; |
xmlChar *ret = NULL; |
if (CUR == '"') { |
NEXT; |
- q = CUR_PTR; |
- while ((IS_CHAR_CH(CUR)) && (CUR != '"')) |
+ |
+ if (CUR_PTR < BASE_PTR) |
+ return(ret); |
+ startPosition = CUR_PTR - BASE_PTR; |
+ |
+ while ((IS_CHAR_CH(CUR)) && (CUR != '"')) { |
NEXT; |
+ len++; |
+ } |
if (!IS_CHAR_CH(CUR)) { |
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
"Unfinished SystemLiteral\n", NULL, NULL); |
} else { |
- ret = xmlStrndup(q, CUR_PTR - q); |
+ ret = xmlStrndup((BASE_PTR+startPosition), len); |
NEXT; |
} |
} else if (CUR == '\'') { |
NEXT; |
- q = CUR_PTR; |
- while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) |
+ |
+ if (CUR_PTR < BASE_PTR) |
+ return(ret); |
+ startPosition = CUR_PTR - BASE_PTR; |
+ |
+ while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) { |
NEXT; |
+ len++; |
+ } |
if (!IS_CHAR_CH(CUR)) { |
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
"Unfinished SystemLiteral\n", NULL, NULL); |
} else { |
- ret = xmlStrndup(q, CUR_PTR - q); |
+ ret = xmlStrndup((BASE_PTR+startPosition), len); |
NEXT; |
} |
} else { |
@@ -2813,32 +2842,47 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { |
static xmlChar * |
htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { |
- const xmlChar *q; |
+ size_t len = 0, startPosition = 0; |
xmlChar *ret = NULL; |
/* |
* Name ::= (Letter | '_') (NameChar)* |
*/ |
if (CUR == '"') { |
NEXT; |
- q = CUR_PTR; |
- while (IS_PUBIDCHAR_CH(CUR)) NEXT; |
+ |
+ if (CUR_PTR < BASE_PTR) |
+ return(ret); |
+ startPosition = CUR_PTR - BASE_PTR; |
+ |
+ while (IS_PUBIDCHAR_CH(CUR)) { |
+ len++; |
+ NEXT; |
+ } |
+ |
if (CUR != '"') { |
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
"Unfinished PubidLiteral\n", NULL, NULL); |
} else { |
- ret = xmlStrndup(q, CUR_PTR - q); |
+ ret = xmlStrndup((BASE_PTR + startPosition), len); |
NEXT; |
} |
} else if (CUR == '\'') { |
NEXT; |
- q = CUR_PTR; |
- while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')) |
- NEXT; |
+ |
+ if (CUR_PTR < BASE_PTR) |
+ return(ret); |
+ startPosition = CUR_PTR - BASE_PTR; |
+ |
+ while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){ |
+ len++; |
+ NEXT; |
+ } |
+ |
if (CUR != '\'') { |
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, |
"Unfinished PubidLiteral\n", NULL, NULL); |
} else { |
- ret = xmlStrndup(q, CUR_PTR - q); |
+ ret = xmlStrndup((BASE_PTR + startPosition), len); |
NEXT; |
} |
} else { |