| Index: third_party/libxml/src/HTMLparser.c
|
| diff --git a/third_party/libxml/src/HTMLparser.c b/third_party/libxml/src/HTMLparser.c
|
| index 69eed2bd3ed5ac3954e77cc23fa403ab9ef86999..d1395fa507c7cdcb78e468fe3d793edf5f424536 100644
|
| --- a/third_party/libxml/src/HTMLparser.c
|
| +++ b/third_party/libxml/src/HTMLparser.c
|
| @@ -105,7 +105,7 @@ htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
|
| *
|
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints
|
| */
|
| -static void
|
| +static void LIBXML_ATTR_FORMAT(3,0)
|
| htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| const char *msg, const xmlChar *str1, const xmlChar *str2)
|
| {
|
| @@ -132,7 +132,7 @@ htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| *
|
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints
|
| */
|
| -static void
|
| +static void LIBXML_ATTR_FORMAT(3,0)
|
| htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| const char *msg, int val)
|
| {
|
| @@ -303,6 +303,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
|
| #define UPP(val) (toupper(ctxt->input->cur[(val)]))
|
|
|
| #define CUR_PTR ctxt->input->cur
|
| +#define BASE_PTR ctxt->input->base
|
|
|
| #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
|
| (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
|
| @@ -2471,6 +2472,10 @@ htmlParseName(htmlParserCtxtPtr ctxt) {
|
| (*in == '_') || (*in == '-') ||
|
| (*in == ':') || (*in == '.'))
|
| in++;
|
| +
|
| + if (in == ctxt->input->end)
|
| + return(NULL);
|
| +
|
| if ((*in > 0) && (*in < 0x80)) {
|
| count = in - ctxt->input->cur;
|
| ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
|
| @@ -2488,6 +2493,7 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
|
| int len = 0, l;
|
| int c;
|
| int count = 0;
|
| + const xmlChar *base = ctxt->input->base;
|
|
|
| /*
|
| * Handler for more complex cases
|
| @@ -2513,7 +2519,18 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
|
| len += l;
|
| NEXTL(l);
|
| c = CUR_CHAR(l);
|
| + if (ctxt->input->base != base) {
|
| + /*
|
| + * We changed encoding from an unknown encoding
|
| + * Input buffer changed location, so we better start again
|
| + */
|
| + return(htmlParseNameComplex(ctxt));
|
| + }
|
| }
|
| +
|
| + if (ctxt->input->base > ctxt->input->cur - len)
|
| + return(NULL);
|
| +
|
| return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
|
| }
|
|
|
| @@ -2765,31 +2782,43 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) {
|
|
|
| static xmlChar *
|
| htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
|
| - const xmlChar *q;
|
| + size_t len = 0, startPosition = 0;
|
| xmlChar *ret = NULL;
|
|
|
| if (CUR == '"') {
|
| NEXT;
|
| - q = CUR_PTR;
|
| - while ((IS_CHAR_CH(CUR)) && (CUR != '"'))
|
| +
|
| + if (CUR_PTR < BASE_PTR)
|
| + return(ret);
|
| + startPosition = CUR_PTR - BASE_PTR;
|
| +
|
| + while ((IS_CHAR_CH(CUR)) && (CUR != '"')) {
|
| NEXT;
|
| + len++;
|
| + }
|
| if (!IS_CHAR_CH(CUR)) {
|
| htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
|
| "Unfinished SystemLiteral\n", NULL, NULL);
|
| } else {
|
| - ret = xmlStrndup(q, CUR_PTR - q);
|
| + ret = xmlStrndup((BASE_PTR+startPosition), len);
|
| NEXT;
|
| }
|
| } else if (CUR == '\'') {
|
| NEXT;
|
| - q = CUR_PTR;
|
| - while ((IS_CHAR_CH(CUR)) && (CUR != '\''))
|
| +
|
| + if (CUR_PTR < BASE_PTR)
|
| + return(ret);
|
| + startPosition = CUR_PTR - BASE_PTR;
|
| +
|
| + while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) {
|
| NEXT;
|
| + len++;
|
| + }
|
| if (!IS_CHAR_CH(CUR)) {
|
| htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
|
| "Unfinished SystemLiteral\n", NULL, NULL);
|
| } else {
|
| - ret = xmlStrndup(q, CUR_PTR - q);
|
| + ret = xmlStrndup((BASE_PTR+startPosition), len);
|
| NEXT;
|
| }
|
| } else {
|
| @@ -2813,32 +2842,47 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
|
|
|
| static xmlChar *
|
| htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
|
| - const xmlChar *q;
|
| + size_t len = 0, startPosition = 0;
|
| xmlChar *ret = NULL;
|
| /*
|
| * Name ::= (Letter | '_') (NameChar)*
|
| */
|
| if (CUR == '"') {
|
| NEXT;
|
| - q = CUR_PTR;
|
| - while (IS_PUBIDCHAR_CH(CUR)) NEXT;
|
| +
|
| + if (CUR_PTR < BASE_PTR)
|
| + return(ret);
|
| + startPosition = CUR_PTR - BASE_PTR;
|
| +
|
| + while (IS_PUBIDCHAR_CH(CUR)) {
|
| + len++;
|
| + NEXT;
|
| + }
|
| +
|
| if (CUR != '"') {
|
| htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
|
| "Unfinished PubidLiteral\n", NULL, NULL);
|
| } else {
|
| - ret = xmlStrndup(q, CUR_PTR - q);
|
| + ret = xmlStrndup((BASE_PTR + startPosition), len);
|
| NEXT;
|
| }
|
| } else if (CUR == '\'') {
|
| NEXT;
|
| - q = CUR_PTR;
|
| - while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\''))
|
| - NEXT;
|
| +
|
| + if (CUR_PTR < BASE_PTR)
|
| + return(ret);
|
| + startPosition = CUR_PTR - BASE_PTR;
|
| +
|
| + while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){
|
| + len++;
|
| + NEXT;
|
| + }
|
| +
|
| if (CUR != '\'') {
|
| htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
|
| "Unfinished PubidLiteral\n", NULL, NULL);
|
| } else {
|
| - ret = xmlStrndup(q, CUR_PTR - q);
|
| + ret = xmlStrndup((BASE_PTR + startPosition), len);
|
| NEXT;
|
| }
|
| } else {
|
|
|