| Index: third_party/libxml/src/HTMLparser.c
|
| diff --git a/third_party/libxml/src/HTMLparser.c b/third_party/libxml/src/HTMLparser.c
|
| index 42dc776ae587e03d38416dafe072e563d4040683..d329d3b54076124bf130815f59e5bef13b062d3d 100644
|
| --- a/third_party/libxml/src/HTMLparser.c
|
| +++ b/third_party/libxml/src/HTMLparser.c
|
| @@ -44,6 +44,9 @@
|
| #include <libxml/globals.h>
|
| #include <libxml/uri.h>
|
|
|
| +#include "buf.h"
|
| +#include "enc.h"
|
| +
|
| #define HTML_MAX_NAMELEN 1000
|
| #define HTML_PARSER_BIG_BUFFER_SIZE 1000
|
| #define HTML_PARSER_BUFFER_SIZE 100
|
| @@ -727,7 +730,7 @@ static const char* const map_contents[] = { BLOCK, "area", NULL } ;
|
| static const char* const name_attr[] = { "name", NULL } ;
|
| static const char* const action_attr[] = { "action", NULL } ;
|
| static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
|
| -static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ;
|
| +static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ;
|
| static const char* const content_attr[] = { "content", NULL } ;
|
| static const char* const type_attr[] = { "type", NULL } ;
|
| static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
|
| @@ -1080,9 +1083,9 @@ static const char * const htmlStartClose[] = {
|
| "menu", "p", "head", "ul", NULL,
|
| "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
|
| "div", "p", "head", NULL,
|
| -"noscript", "p", "head", NULL,
|
| +"noscript", "p", NULL,
|
| "center", "font", "b", "i", "p", "head", NULL,
|
| -"a", "a", NULL,
|
| +"a", "a", "head", NULL,
|
| "caption", "p", NULL,
|
| "colgroup", "caption", "colgroup", "col", "p", NULL,
|
| "col", "caption", "col", "p", NULL,
|
| @@ -1100,6 +1103,43 @@ static const char * const htmlStartClose[] = {
|
| "option", "option", NULL,
|
| "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
|
| "pre", "listing", "xmp", "a", NULL,
|
| +/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */
|
| +"tt", "head", NULL,
|
| +"i", "head", NULL,
|
| +"b", "head", NULL,
|
| +"u", "head", NULL,
|
| +"s", "head", NULL,
|
| +"strike", "head", NULL,
|
| +"big", "head", NULL,
|
| +"small", "head", NULL,
|
| +
|
| +"em", "head", NULL,
|
| +"strong", "head", NULL,
|
| +"dfn", "head", NULL,
|
| +"code", "head", NULL,
|
| +"samp", "head", NULL,
|
| +"kbd", "head", NULL,
|
| +"var", "head", NULL,
|
| +"cite", "head", NULL,
|
| +"abbr", "head", NULL,
|
| +"acronym", "head", NULL,
|
| +
|
| +/* "a" */
|
| +"img", "head", NULL,
|
| +/* "applet" */
|
| +/* "embed" */
|
| +/* "object" */
|
| +"font", "head", NULL,
|
| +/* "basefont" */
|
| +"br", "head", NULL,
|
| +/* "script" */
|
| +"map", "head", NULL,
|
| +"q", "head", NULL,
|
| +"sub", "head", NULL,
|
| +"sup", "head", NULL,
|
| +"span", "head", NULL,
|
| +"bdo", "head", NULL,
|
| +"iframe", "head", NULL,
|
| NULL
|
| };
|
|
|
| @@ -1137,7 +1177,7 @@ static const char *const htmlScriptAttributes[] = {
|
| "onfocus",
|
| "onblur",
|
| "onsubmit",
|
| - "onrest",
|
| + "onreset",
|
| "onchange",
|
| "onselect"
|
| };
|
| @@ -2887,9 +2927,11 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
|
| }
|
|
|
| if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
|
| - htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
|
| - "Invalid char in CDATA 0x%X\n", cur);
|
| - NEXT;
|
| + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
|
| + "Invalid char in CDATA 0x%X\n", cur);
|
| + if (ctxt->input->cur < ctxt->input->end) {
|
| + NEXT;
|
| + }
|
| }
|
|
|
| if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
| @@ -2939,9 +2981,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
| */
|
| if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
| if (areBlanks(ctxt, buf, nbchar)) {
|
| - if (ctxt->sax->ignorableWhitespace != NULL)
|
| - ctxt->sax->ignorableWhitespace(ctxt->userData,
|
| - buf, nbchar);
|
| + if (ctxt->keepBlanks) {
|
| + if (ctxt->sax->characters != NULL)
|
| + ctxt->sax->characters(ctxt->userData, buf, nbchar);
|
| + } else {
|
| + if (ctxt->sax->ignorableWhitespace != NULL)
|
| + ctxt->sax->ignorableWhitespace(ctxt->userData,
|
| + buf, nbchar);
|
| + }
|
| } else {
|
| htmlCheckParagraph(ctxt);
|
| if (ctxt->sax->characters != NULL)
|
| @@ -2972,8 +3019,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
| */
|
| if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
| if (areBlanks(ctxt, buf, nbchar)) {
|
| - if (ctxt->sax->ignorableWhitespace != NULL)
|
| - ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
|
| + if (ctxt->keepBlanks) {
|
| + if (ctxt->sax->characters != NULL)
|
| + ctxt->sax->characters(ctxt->userData, buf, nbchar);
|
| + } else {
|
| + if (ctxt->sax->ignorableWhitespace != NULL)
|
| + ctxt->sax->ignorableWhitespace(ctxt->userData,
|
| + buf, nbchar);
|
| + }
|
| } else {
|
| htmlCheckParagraph(ctxt);
|
| if (ctxt->sax->characters != NULL)
|
| @@ -3275,7 +3328,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {
|
| val = val * 16 + (CUR - 'A') + 10;
|
| else {
|
| htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
|
| - "htmlParseCharRef: missing semicolumn\n",
|
| + "htmlParseCharRef: missing semicolon\n",
|
| NULL, NULL);
|
| break;
|
| }
|
| @@ -3290,7 +3343,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {
|
| val = val * 10 + (CUR - '0');
|
| else {
|
| htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
|
| - "htmlParseCharRef: missing semicolumn\n",
|
| + "htmlParseCharRef: missing semicolon\n",
|
| NULL, NULL);
|
| break;
|
| }
|
| @@ -3433,34 +3486,26 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
|
| }
|
|
|
| /**
|
| - * htmlCheckEncoding:
|
| + * htmlCheckEncodingDirect:
|
| * @ctxt: an HTML parser context
|
| * @attvalue: the attribute value
|
| *
|
| - * Checks an http-equiv attribute from a Meta tag to detect
|
| + * Checks an attribute value to detect
|
| * the encoding
|
| * If a new encoding is detected the parser is switched to decode
|
| * it and pass UTF8
|
| */
|
| static void
|
| -htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
|
| - const xmlChar *encoding;
|
| +htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
|
|
|
| - if ((ctxt == NULL) || (attvalue == NULL))
|
| + if ((ctxt == NULL) || (encoding == NULL) ||
|
| + (ctxt->options & HTML_PARSE_IGNORE_ENC))
|
| return;
|
|
|
| /* do not change encoding */
|
| if (ctxt->input->encoding != NULL)
|
| return;
|
|
|
| - encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");
|
| - if (encoding != NULL) {
|
| - encoding += 8;
|
| - } else {
|
| - encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");
|
| - if (encoding != NULL)
|
| - encoding += 9;
|
| - }
|
| if (encoding != NULL) {
|
| xmlCharEncoding enc;
|
| xmlCharEncodingHandlerPtr handler;
|
| @@ -3498,7 +3543,9 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
|
| xmlSwitchToEncoding(ctxt, handler);
|
| ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
| } else {
|
| - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
| + htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
| + "htmlCheckEncoding: unknown encoding %s\n",
|
| + encoding, NULL);
|
| }
|
| }
|
|
|
| @@ -3513,24 +3560,51 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
|
| * convert as much as possible to the parser reading buffer.
|
| */
|
| processed = ctxt->input->cur - ctxt->input->base;
|
| - xmlBufferShrink(ctxt->input->buf->buffer, processed);
|
| - nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
| - ctxt->input->buf->buffer,
|
| - ctxt->input->buf->raw);
|
| + xmlBufShrink(ctxt->input->buf->buffer, processed);
|
| + nbchars = xmlCharEncInput(ctxt->input->buf, 1);
|
| if (nbchars < 0) {
|
| htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
|
| "htmlCheckEncoding: encoder error\n",
|
| NULL, NULL);
|
| }
|
| - ctxt->input->base =
|
| - ctxt->input->cur = ctxt->input->buf->buffer->content;
|
| - ctxt->input->end =
|
| - &ctxt->input->base[ctxt->input->buf->buffer->use];
|
| + xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
|
| }
|
| }
|
| }
|
|
|
| /**
|
| + * htmlCheckEncoding:
|
| + * @ctxt: an HTML parser context
|
| + * @attvalue: the attribute value
|
| + *
|
| + * Checks an http-equiv attribute from a Meta tag to detect
|
| + * the encoding
|
| + * If a new encoding is detected the parser is switched to decode
|
| + * it and pass UTF8
|
| + */
|
| +static void
|
| +htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
|
| + const xmlChar *encoding;
|
| +
|
| + if (!attvalue)
|
| + return;
|
| +
|
| + encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
|
| + if (encoding != NULL) {
|
| + encoding += 7;
|
| + }
|
| + /*
|
| + * skip blank
|
| + */
|
| + if (encoding && IS_BLANK_CH(*encoding))
|
| + encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
|
| + if (encoding && *encoding == '=') {
|
| + encoding ++;
|
| + htmlCheckEncodingDirect(ctxt, encoding);
|
| + }
|
| +}
|
| +
|
| +/**
|
| * htmlCheckMeta:
|
| * @ctxt: an HTML parser context
|
| * @atts: the attributes values
|
| @@ -3554,6 +3628,8 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
|
| if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
|
| && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
|
| http = 1;
|
| + else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset")))
|
| + htmlCheckEncodingDirect(ctxt, value);
|
| else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
|
| content = value;
|
| att = atts[i++];
|
| @@ -3595,13 +3671,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
| int i;
|
| int discardtag = 0;
|
|
|
| - if (ctxt->instate == XML_PARSER_EOF)
|
| - return(-1);
|
| if ((ctxt == NULL) || (ctxt->input == NULL)) {
|
| htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
|
| "htmlParseStartTag: context error\n", NULL, NULL);
|
| return -1;
|
| }
|
| + if (ctxt->instate == XML_PARSER_EOF)
|
| + return(-1);
|
| if (CUR != '<') return -1;
|
| NEXT;
|
|
|
| @@ -3883,6 +3959,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
| if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
|
| if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
|
| ctxt->sax->endElement(ctxt->userData, name);
|
| + htmlNodeInfoPop(ctxt);
|
| htmlnamePop(ctxt);
|
| ret = 1;
|
| } else {
|
| @@ -4289,7 +4366,7 @@ static void
|
| htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
|
| const xmlChar *name;
|
| const htmlElemDesc * info;
|
| - htmlParserNodeInfo node_info;
|
| + htmlParserNodeInfo node_info = { 0, };
|
| int failed;
|
|
|
| if ((ctxt == NULL) || (ctxt->input == NULL)) {
|
| @@ -4670,7 +4747,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
| if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
|
| ctxt->sax->endDocument(ctxt->userData);
|
|
|
| - if (ctxt->myDoc != NULL) {
|
| + if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
|
| dtd = xmlGetIntSubset(ctxt->myDoc);
|
| if (dtd == NULL)
|
| ctxt->myDoc->intSubset =
|
| @@ -4875,9 +4952,7 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) {
|
|
|
| input->filename = NULL;
|
| input->buf = buf;
|
| - input->base = input->buf->buffer->content;
|
| - input->cur = input->buf->buffer->content;
|
| - input->end = &input->buf->buffer->content[input->buf->buffer->use];
|
| + xmlBufResetInput(buf->buffer, input);
|
|
|
| inputPush(ctxt, input);
|
| return(ctxt);
|
| @@ -4994,8 +5069,8 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
|
| buf = in->base;
|
| len = in->length;
|
| } else {
|
| - buf = in->buf->buffer->content;
|
| - len = in->buf->buffer->use;
|
| + buf = xmlBufContent(in->buf->buffer);
|
| + len = xmlBufUse(in->buf->buffer);
|
| }
|
|
|
| /* take into account the sequence length */
|
| @@ -5087,13 +5162,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
|
| * @stop: Array of chars, which stop the lookup.
|
| * @stopLen: Length of stop-Array
|
| *
|
| - * Try to find if any char of the stop-Array is available in the input
|
| + * Try to find if any char of the stop-Array is available in the input
|
| * stream.
|
| * This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
| * to avoid rescanning sequences of bytes, it DOES change the state of the
|
| * parser, do not use liberally.
|
| *
|
| - * Returns the index to the current parsing point if a stopChar
|
| + * Returns the index to the current parsing point if a stopChar
|
| * is available, -1 otherwise.
|
| */
|
| static int
|
| @@ -5121,8 +5196,8 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
|
| buf = in->base;
|
| len = in->length;
|
| } else {
|
| - buf = in->buf->buffer->content;
|
| - len = in->buf->buffer->use;
|
| + buf = xmlBufContent(in->buf->buffer);
|
| + len = xmlBufUse(in->buf->buffer);
|
| }
|
|
|
| for (; base < len; base++) {
|
| @@ -5171,6 +5246,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| int avail = 0;
|
| xmlChar cur, next;
|
|
|
| + htmlParserNodeInfo node_info;
|
| +
|
| #ifdef DEBUG_PUSH
|
| switch (ctxt->instate) {
|
| case XML_PARSER_EOF:
|
| @@ -5231,7 +5308,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| if (in->buf == NULL)
|
| avail = in->length - (in->cur - in->base);
|
| else
|
| - avail = in->buf->buffer->use - (in->cur - in->base);
|
| + avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
|
| if ((avail == 0) && (terminate)) {
|
| htmlAutoCloseOnEnd(ctxt);
|
| if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
|
| @@ -5267,7 +5344,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| if (in->buf == NULL)
|
| avail = in->length - (in->cur - in->base);
|
| else
|
| - avail = in->buf->buffer->use - (in->cur - in->base);
|
| + avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
|
| }
|
| if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
|
| ctxt->sax->setDocumentLocator(ctxt->userData,
|
| @@ -5309,11 +5386,24 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| if (in->buf == NULL)
|
| avail = in->length - (in->cur - in->base);
|
| else
|
| - avail = in->buf->buffer->use - (in->cur - in->base);
|
| - if (avail < 2)
|
| + avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
|
| + /*
|
| + * no chars in buffer
|
| + */
|
| + if (avail < 1)
|
| goto done;
|
| + /*
|
| + * not enouth chars in buffer
|
| + */
|
| + if (avail < 2) {
|
| + if (!terminate)
|
| + goto done;
|
| + else
|
| + next = ' ';
|
| + } else {
|
| + next = in->cur[1];
|
| + }
|
| cur = in->cur[0];
|
| - next = in->cur[1];
|
| if ((cur == '<') && (next == '!') &&
|
| (in->cur[2] == '-') && (in->cur[3] == '-')) {
|
| if ((!terminate) &&
|
| @@ -5369,7 +5459,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| if (in->buf == NULL)
|
| avail = in->length - (in->cur - in->base);
|
| else
|
| - avail = in->buf->buffer->use - (in->cur - in->base);
|
| + avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
|
| if (avail < 2)
|
| goto done;
|
| cur = in->cur[0];
|
| @@ -5410,7 +5500,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| if (in->buf == NULL)
|
| avail = in->length - (in->cur - in->base);
|
| else
|
| - avail = in->buf->buffer->use - (in->cur - in->base);
|
| + avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
|
| if (avail < 1)
|
| goto done;
|
| cur = in->cur[0];
|
| @@ -5463,8 +5553,22 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| int failed;
|
| const htmlElemDesc * info;
|
|
|
| - if (avail < 2)
|
| + /*
|
| + * no chars in buffer
|
| + */
|
| + if (avail < 1)
|
| goto done;
|
| + /*
|
| + * not enouth chars in buffer
|
| + */
|
| + if (avail < 2) {
|
| + if (!terminate)
|
| + goto done;
|
| + else
|
| + next = ' ';
|
| + } else {
|
| + next = in->cur[1];
|
| + }
|
| cur = in->cur[0];
|
| if (cur != '<') {
|
| ctxt->instate = XML_PARSER_CONTENT;
|
| @@ -5474,7 +5578,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| #endif
|
| break;
|
| }
|
| - if (in->cur[1] == '/') {
|
| + if (next == '/') {
|
| ctxt->instate = XML_PARSER_END_TAG;
|
| ctxt->checkIndex = 0;
|
| #ifdef DEBUG_PUSH
|
| @@ -5487,6 +5591,14 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
|
| goto done;
|
|
|
| + /* Capture start position */
|
| + if (ctxt->record_info) {
|
| + node_info.begin_pos = ctxt->input->consumed +
|
| + (CUR_PTR - ctxt->input->base);
|
| + node_info.begin_line = ctxt->input->line;
|
| + }
|
| +
|
| +
|
| failed = htmlParseStartTag(ctxt);
|
| name = ctxt->name;
|
| if ((failed == -1) ||
|
| @@ -5536,6 +5648,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| htmlnamePop(ctxt);
|
| }
|
|
|
| + if (ctxt->record_info)
|
| + htmlNodeInfoPush(ctxt, &node_info);
|
| +
|
| ctxt->instate = XML_PARSER_CONTENT;
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| @@ -5552,6 +5667,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| ctxt->sax->endElement(ctxt->userData, name);
|
| htmlnamePop(ctxt);
|
| }
|
| +
|
| + if (ctxt->record_info)
|
| + htmlNodeInfoPush(ctxt, &node_info);
|
| +
|
| ctxt->instate = XML_PARSER_CONTENT;
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| @@ -5579,9 +5698,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| if ((cur != '<') && (cur != '&')) {
|
| if (ctxt->sax != NULL) {
|
| if (IS_BLANK_CH(cur)) {
|
| - if (ctxt->sax->ignorableWhitespace != NULL)
|
| - ctxt->sax->ignorableWhitespace(
|
| - ctxt->userData, &cur, 1);
|
| + if (ctxt->keepBlanks) {
|
| + if (ctxt->sax->characters != NULL)
|
| + ctxt->sax->characters(
|
| + ctxt->userData, &cur, 1);
|
| + } else {
|
| + if (ctxt->sax->ignorableWhitespace != NULL)
|
| + ctxt->sax->ignorableWhitespace(
|
| + ctxt->userData, &cur, 1);
|
| + }
|
| } else {
|
| htmlCheckParagraph(ctxt);
|
| if (ctxt->sax->characters != NULL)
|
| @@ -5609,7 +5734,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
| int idx;
|
| xmlChar val;
|
|
|
| - idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1);
|
| + idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0);
|
| if (idx < 0)
|
| goto done;
|
| val = in->cur[idx + 2];
|
| @@ -5866,7 +5991,7 @@ done:
|
| ctxt->sax->endDocument(ctxt->userData);
|
| }
|
| }
|
| - if ((ctxt->myDoc != NULL) &&
|
| + if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&
|
| ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
|
| (ctxt->instate == XML_PARSER_EPILOG))) {
|
| xmlDtdPtr dtd;
|
| @@ -5904,8 +6029,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
| }
|
| if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
| (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
|
| - int base = ctxt->input->base - ctxt->input->buf->buffer->content;
|
| - int cur = ctxt->input->cur - ctxt->input->base;
|
| + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
|
| + size_t cur = ctxt->input->cur - ctxt->input->base;
|
| int res;
|
|
|
| res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
| @@ -5914,10 +6039,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
| ctxt->disableSAX = 1;
|
| return (XML_PARSER_EOF);
|
| }
|
| - ctxt->input->base = ctxt->input->buf->buffer->content + base;
|
| - ctxt->input->cur = ctxt->input->base + cur;
|
| - ctxt->input->end =
|
| - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
|
| + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
|
| #endif
|
| @@ -5932,13 +6054,16 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
| if ((in->encoder != NULL) && (in->buffer != NULL) &&
|
| (in->raw != NULL)) {
|
| int nbchars;
|
| + size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
|
| + size_t current = ctxt->input->cur - ctxt->input->base;
|
|
|
| - nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
|
| + nbchars = xmlCharEncInput(in, terminate);
|
| if (nbchars < 0) {
|
| htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
|
| "encoder error\n", NULL, NULL);
|
| return(XML_ERR_INVALID_ENCODING);
|
| }
|
| + xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
|
| }
|
| }
|
| }
|
| @@ -6032,24 +6157,18 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
|
| inputStream->filename = (char *)
|
| xmlCanonicPath((const xmlChar *) filename);
|
| inputStream->buf = buf;
|
| - inputStream->base = inputStream->buf->buffer->content;
|
| - inputStream->cur = inputStream->buf->buffer->content;
|
| - inputStream->end =
|
| - &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
|
| + xmlBufResetInput(buf->buffer, inputStream);
|
|
|
| inputPush(ctxt, inputStream);
|
|
|
| if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
| (ctxt->input->buf != NULL)) {
|
| - int base = ctxt->input->base - ctxt->input->buf->buffer->content;
|
| - int cur = ctxt->input->cur - ctxt->input->base;
|
| + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
|
| + size_t cur = ctxt->input->cur - ctxt->input->base;
|
|
|
| xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
|
|
| - ctxt->input->base = ctxt->input->buf->buffer->content + base;
|
| - ctxt->input->cur = ctxt->input->base + cur;
|
| - ctxt->input->end =
|
| - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
|
| + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
|
| #endif
|
| @@ -6169,12 +6288,16 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding)
|
|
|
| /* set encoding */
|
| if (encoding) {
|
| - content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);
|
| - if (content) {
|
| - strcpy ((char *)content, (char *)content_line);
|
| - strcat ((char *)content, (char *)encoding);
|
| - htmlCheckEncoding (ctxt, content);
|
| - xmlFree (content);
|
| + size_t l = strlen(encoding);
|
| +
|
| + if (l < 1000) {
|
| + content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1);
|
| + if (content) {
|
| + strcpy ((char *)content, (char *)content_line);
|
| + strcat ((char *)content, (char *)encoding);
|
| + htmlCheckEncoding (ctxt, content);
|
| + xmlFree (content);
|
| + }
|
| }
|
| }
|
|
|
| @@ -6451,6 +6574,7 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
|
|
|
| ctxt->wellFormed = 1;
|
| ctxt->nsWellFormed = 1;
|
| + ctxt->disableSAX = 0;
|
| ctxt->valid = 1;
|
| ctxt->vctxt.userData = ctxt;
|
| ctxt->vctxt.error = xmlParserValidityError;
|
| @@ -6530,6 +6654,18 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
|
| ctxt->options |= XML_PARSE_HUGE;
|
| options -= XML_PARSE_HUGE;
|
| }
|
| + if (options & HTML_PARSE_NODEFDTD) {
|
| + ctxt->options |= HTML_PARSE_NODEFDTD;
|
| + options -= HTML_PARSE_NODEFDTD;
|
| + }
|
| + if (options & HTML_PARSE_IGNORE_ENC) {
|
| + ctxt->options |= HTML_PARSE_IGNORE_ENC;
|
| + options -= HTML_PARSE_IGNORE_ENC;
|
| + }
|
| + if (options & HTML_PARSE_NOIMPLIED) {
|
| + ctxt->options |= HTML_PARSE_NOIMPLIED;
|
| + options -= HTML_PARSE_NOIMPLIED;
|
| + }
|
| ctxt->dictNames = 0;
|
| return (options);
|
| }
|
| @@ -6676,6 +6812,7 @@ htmlReadFd(int fd, const char *URL, const char *encoding, int options)
|
|
|
| if (fd < 0)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| xmlInitParser();
|
| input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
| @@ -6723,8 +6860,11 @@ htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
|
|
|
| input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
| XML_CHAR_ENCODING_NONE);
|
| - if (input == NULL)
|
| + if (input == NULL) {
|
| + if (ioclose != NULL)
|
| + ioclose(ioctx);
|
| return (NULL);
|
| + }
|
| ctxt = htmlNewParserCtxt();
|
| if (ctxt == NULL) {
|
| xmlFreeParserInputBuffer(input);
|
| @@ -6763,6 +6903,7 @@ htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| htmlCtxtReset(ctxt);
|
|
|
| @@ -6796,6 +6937,7 @@ htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| htmlCtxtReset(ctxt);
|
|
|
| @@ -6832,6 +6974,7 @@ htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
|
| return (NULL);
|
| if (buffer == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| htmlCtxtReset(ctxt);
|
|
|
| @@ -6874,6 +7017,7 @@ htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| htmlCtxtReset(ctxt);
|
|
|
| @@ -6918,13 +7062,17 @@ htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| htmlCtxtReset(ctxt);
|
|
|
| input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
| XML_CHAR_ENCODING_NONE);
|
| - if (input == NULL)
|
| + if (input == NULL) {
|
| + if (ioclose != NULL)
|
| + ioclose(ioctx);
|
| return (NULL);
|
| + }
|
| stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
| if (stream == NULL) {
|
| xmlFreeParserInputBuffer(input);
|
|
|