third_party/libxml/src/HTMLparser.c - Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28

Unified Diff: third_party/libxml/src/HTMLparser.c

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: no iconv Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/libxml/src/HTMLparser.c

diff --git a/third_party/libxml/src/HTMLparser.c b/third_party/libxml/src/HTMLparser.c

index 42dc776ae587e03d38416dafe072e563d4040683..d329d3b54076124bf130815f59e5bef13b062d3d 100644

--- a/third_party/libxml/src/HTMLparser.c

+++ b/third_party/libxml/src/HTMLparser.c

@@ -44,6 +44,9 @@

#include <libxml/globals.h>

#include <libxml/uri.h>

+#include "buf.h"

+#include "enc.h"

#define HTML_MAX_NAMELEN 1000

#define HTML_PARSER_BIG_BUFFER_SIZE 1000

#define HTML_PARSER_BUFFER_SIZE 100

@@ -727,7 +730,7 @@ static const char* const map_contents[] = { BLOCK, "area", NULL } ;

static const char* const name_attr[] = { "name", NULL } ;

static const char* const action_attr[] = { "action", NULL } ;

static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;

-static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ;

+static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ;

static const char* const content_attr[] = { "content", NULL } ;

static const char* const type_attr[] = { "type", NULL } ;

static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;

@@ -1080,9 +1083,9 @@ static const char * const htmlStartClose[] = {

"menu", "p", "head", "ul", NULL,

"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,

"div", "p", "head", NULL,

-"noscript", "p", "head", NULL,

+"noscript", "p", NULL,

"center", "font", "b", "i", "p", "head", NULL,

-"a", "a", NULL,

+"a", "a", "head", NULL,

"caption", "p", NULL,

"colgroup", "caption", "colgroup", "col", "p", NULL,

"col", "caption", "col", "p", NULL,

@@ -1100,6 +1103,43 @@ static const char * const htmlStartClose[] = {

"option", "option", NULL,

"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",

"pre", "listing", "xmp", "a", NULL,

+/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */

+"tt", "head", NULL,

+"i", "head", NULL,

+"b", "head", NULL,

+"u", "head", NULL,

+"s", "head", NULL,

+"strike", "head", NULL,

+"big", "head", NULL,

+"small", "head", NULL,

+"em", "head", NULL,

+"strong", "head", NULL,

+"dfn", "head", NULL,

+"code", "head", NULL,

+"samp", "head", NULL,

+"kbd", "head", NULL,

+"var", "head", NULL,

+"cite", "head", NULL,

+"abbr", "head", NULL,

+"acronym", "head", NULL,

+/* "a" */

+"img", "head", NULL,

+/* "applet" */

+/* "embed" */

+/* "object" */

+"font", "head", NULL,

+/* "basefont" */

+"br", "head", NULL,

+/* "script" */

+"map", "head", NULL,

+"q", "head", NULL,

+"sub", "head", NULL,

+"sup", "head", NULL,

+"span", "head", NULL,

+"bdo", "head", NULL,

+"iframe", "head", NULL,

NULL

};

@@ -1137,7 +1177,7 @@ static const char *const htmlScriptAttributes[] = {

"onfocus",

"onblur",

"onsubmit",

- "onrest",

+ "onreset",

"onchange",

"onselect"

};

@@ -2887,9 +2927,11 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {

}

if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {

- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,

- "Invalid char in CDATA 0x%X\n", cur);

- NEXT;

+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,

+ "Invalid char in CDATA 0x%X\n", cur);

+ if (ctxt->input->cur < ctxt->input->end) {

+ NEXT;

+ }

}

if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {

@@ -2939,9 +2981,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {

if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {

if (areBlanks(ctxt, buf, nbchar)) {

- if (ctxt->sax->ignorableWhitespace != NULL)

- ctxt->sax->ignorableWhitespace(ctxt->userData,

- buf, nbchar);

+ if (ctxt->keepBlanks) {

+ if (ctxt->sax->characters != NULL)

+ ctxt->sax->characters(ctxt->userData, buf, nbchar);

+ } else {

+ if (ctxt->sax->ignorableWhitespace != NULL)

+ ctxt->sax->ignorableWhitespace(ctxt->userData,

+ buf, nbchar);

+ }

} else {

htmlCheckParagraph(ctxt);

if (ctxt->sax->characters != NULL)

@@ -2972,8 +3019,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {

if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {

if (areBlanks(ctxt, buf, nbchar)) {

- if (ctxt->sax->ignorableWhitespace != NULL)

- ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);

+ if (ctxt->keepBlanks) {

+ if (ctxt->sax->characters != NULL)

+ ctxt->sax->characters(ctxt->userData, buf, nbchar);

+ } else {

+ if (ctxt->sax->ignorableWhitespace != NULL)

+ ctxt->sax->ignorableWhitespace(ctxt->userData,

+ buf, nbchar);

+ }

} else {

htmlCheckParagraph(ctxt);

if (ctxt->sax->characters != NULL)

@@ -3275,7 +3328,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {

val = val * 16 + (CUR - 'A') + 10;

else {

htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,

- "htmlParseCharRef: missing semicolumn\n",

+ "htmlParseCharRef: missing semicolon\n",

NULL, NULL);

break;

}

@@ -3290,7 +3343,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {

val = val * 10 + (CUR - '0');

else {

htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,

- "htmlParseCharRef: missing semicolumn\n",

+ "htmlParseCharRef: missing semicolon\n",

NULL, NULL);

break;

}

@@ -3433,34 +3486,26 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {

}

/**

- * htmlCheckEncoding:

+ * htmlCheckEncodingDirect:

* @ctxt: an HTML parser context

* @attvalue: the attribute value

- * Checks an http-equiv attribute from a Meta tag to detect

+ * Checks an attribute value to detect

* the encoding

* If a new encoding is detected the parser is switched to decode

* it and pass UTF8

static void

-htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {

- const xmlChar *encoding;

+htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {

- if ((ctxt == NULL) || (attvalue == NULL))

+ if ((ctxt == NULL) || (encoding == NULL) ||

+ (ctxt->options & HTML_PARSE_IGNORE_ENC))

return;

/* do not change encoding */

if (ctxt->input->encoding != NULL)

return;

- encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");

- if (encoding != NULL) {

- encoding += 8;

- } else {

- encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");

- if (encoding != NULL)

- encoding += 9;

- }

if (encoding != NULL) {

xmlCharEncoding enc;

xmlCharEncodingHandlerPtr handler;

@@ -3498,7 +3543,9 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {

xmlSwitchToEncoding(ctxt, handler);

ctxt->charset = XML_CHAR_ENCODING_UTF8;

} else {

- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;

+ htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,

+ "htmlCheckEncoding: unknown encoding %s\n",

+ encoding, NULL);

}

@@ -3513,24 +3560,51 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {

* convert as much as possible to the parser reading buffer.

processed = ctxt->input->cur - ctxt->input->base;

- xmlBufferShrink(ctxt->input->buf->buffer, processed);

- nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,

- ctxt->input->buf->buffer,

- ctxt->input->buf->raw);

+ xmlBufShrink(ctxt->input->buf->buffer, processed);

+ nbchars = xmlCharEncInput(ctxt->input->buf, 1);

if (nbchars < 0) {

htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,

"htmlCheckEncoding: encoder error\n",

NULL, NULL);

}

- ctxt->input->base =

- ctxt->input->cur = ctxt->input->buf->buffer->content;

- ctxt->input->end =

- &ctxt->input->base[ctxt->input->buf->buffer->use];

+ xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);

}

/**

+ * htmlCheckEncoding:

+ * @ctxt: an HTML parser context

+ * @attvalue: the attribute value

+ *

+ * Checks an http-equiv attribute from a Meta tag to detect

+ * the encoding

+ * If a new encoding is detected the parser is switched to decode

+ * it and pass UTF8

+ */

+static void

+htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {

+ const xmlChar *encoding;

+ if (!attvalue)

+ return;

+ encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");

+ if (encoding != NULL) {

+ encoding += 7;

+ }

+ /*

+ * skip blank

+ */

+ if (encoding && IS_BLANK_CH(*encoding))

+ encoding = xmlStrcasestr(attvalue, BAD_CAST"=");

+ if (encoding && *encoding == '=') {

+ encoding ++;

+ htmlCheckEncodingDirect(ctxt, encoding);

+ }

+/**

* htmlCheckMeta:

* @ctxt: an HTML parser context

* @atts: the attributes values

@@ -3554,6 +3628,8 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {

if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))

&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))

http = 1;

+ else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset")))

+ htmlCheckEncodingDirect(ctxt, value);

else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))

content = value;

att = atts[i++];

@@ -3595,13 +3671,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {

int i;

int discardtag = 0;

- if (ctxt->instate == XML_PARSER_EOF)

- return(-1);

if ((ctxt == NULL) || (ctxt->input == NULL)) {

htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,

"htmlParseStartTag: context error\n", NULL, NULL);

return -1;

}

+ if (ctxt->instate == XML_PARSER_EOF)

+ return(-1);

if (CUR != '<') return -1;

NEXT;

@@ -3883,6 +3959,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)

if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {

if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))

ctxt->sax->endElement(ctxt->userData, name);

+ htmlNodeInfoPop(ctxt);

htmlnamePop(ctxt);

ret = 1;

} else {

@@ -4289,7 +4366,7 @@ static void

htmlParseElementInternal(htmlParserCtxtPtr ctxt) {

const xmlChar *name;

const htmlElemDesc * info;

- htmlParserNodeInfo node_info;

+ htmlParserNodeInfo node_info = { 0, };

int failed;

if ((ctxt == NULL) || (ctxt->input == NULL)) {

@@ -4670,7 +4747,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {

if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))

ctxt->sax->endDocument(ctxt->userData);

- if (ctxt->myDoc != NULL) {

+ if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {

dtd = xmlGetIntSubset(ctxt->myDoc);

if (dtd == NULL)

ctxt->myDoc->intSubset =

@@ -4875,9 +4952,7 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) {

input->filename = NULL;

input->buf = buf;

- input->base = input->buf->buffer->content;

- input->cur = input->buf->buffer->content;

- input->end = &input->buf->buffer->content[input->buf->buffer->use];

+ xmlBufResetInput(buf->buffer, input);

inputPush(ctxt, input);

return(ctxt);

@@ -4994,8 +5069,8 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,

buf = in->base;

len = in->length;

} else {

- buf = in->buf->buffer->content;

- len = in->buf->buffer->use;

+ buf = xmlBufContent(in->buf->buffer);

+ len = xmlBufUse(in->buf->buffer);

}

/* take into account the sequence length */

@@ -5087,13 +5162,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,

* @stop: Array of chars, which stop the lookup.

* @stopLen: Length of stop-Array

- * Try to find if any char of the stop-Array is available in the input

+ * Try to find if any char of the stop-Array is available in the input

* stream.

* This function has a side effect of (possibly) incrementing ctxt->checkIndex

* to avoid rescanning sequences of bytes, it DOES change the state of the

* parser, do not use liberally.

- * Returns the index to the current parsing point if a stopChar

+ * Returns the index to the current parsing point if a stopChar

* is available, -1 otherwise.

static int

@@ -5121,8 +5196,8 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,

buf = in->base;

len = in->length;

} else {

- buf = in->buf->buffer->content;

- len = in->buf->buffer->use;

+ buf = xmlBufContent(in->buf->buffer);

+ len = xmlBufUse(in->buf->buffer);

}

for (; base < len; base++) {

@@ -5171,6 +5246,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

int avail = 0;

xmlChar cur, next;

+ htmlParserNodeInfo node_info;

#ifdef DEBUG_PUSH

switch (ctxt->instate) {

case XML_PARSER_EOF:

@@ -5231,7 +5308,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

if (in->buf == NULL)

avail = in->length - (in->cur - in->base);

else

- avail = in->buf->buffer->use - (in->cur - in->base);

+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);

if ((avail == 0) && (terminate)) {

htmlAutoCloseOnEnd(ctxt);

if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {

@@ -5267,7 +5344,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

if (in->buf == NULL)

avail = in->length - (in->cur - in->base);

else

- avail = in->buf->buffer->use - (in->cur - in->base);

+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);

}

if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))

ctxt->sax->setDocumentLocator(ctxt->userData,

@@ -5309,11 +5386,24 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

if (in->buf == NULL)

avail = in->length - (in->cur - in->base);

else

- avail = in->buf->buffer->use - (in->cur - in->base);

- if (avail < 2)

+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);

+ /*

+ * no chars in buffer

+ */

+ if (avail < 1)

goto done;

+ /*

+ * not enouth chars in buffer

+ */

+ if (avail < 2) {

+ if (!terminate)

+ goto done;

+ else

+ next = ' ';

+ } else {

+ next = in->cur[1];

+ }

cur = in->cur[0];

- next = in->cur[1];

if ((cur == '<') && (next == '!') &&

(in->cur[2] == '-') && (in->cur[3] == '-')) {

if ((!terminate) &&

@@ -5369,7 +5459,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

if (in->buf == NULL)

avail = in->length - (in->cur - in->base);

else

- avail = in->buf->buffer->use - (in->cur - in->base);

+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);

if (avail < 2)

goto done;

cur = in->cur[0];

@@ -5410,7 +5500,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

if (in->buf == NULL)

avail = in->length - (in->cur - in->base);

else

- avail = in->buf->buffer->use - (in->cur - in->base);

+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);

if (avail < 1)

goto done;

cur = in->cur[0];

@@ -5463,8 +5553,22 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

int failed;

const htmlElemDesc * info;

- if (avail < 2)

+ /*

+ * no chars in buffer

+ */

+ if (avail < 1)

goto done;

+ /*

+ * not enouth chars in buffer

+ */

+ if (avail < 2) {

+ if (!terminate)

+ goto done;

+ else

+ next = ' ';

+ } else {

+ next = in->cur[1];

+ }

cur = in->cur[0];

if (cur != '<') {

ctxt->instate = XML_PARSER_CONTENT;

@@ -5474,7 +5578,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

#endif

break;

}

- if (in->cur[1] == '/') {

+ if (next == '/') {

ctxt->instate = XML_PARSER_END_TAG;

ctxt->checkIndex = 0;

#ifdef DEBUG_PUSH

@@ -5487,6 +5591,14 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))

goto done;

+ /* Capture start position */

+ if (ctxt->record_info) {

+ node_info.begin_pos = ctxt->input->consumed +

+ (CUR_PTR - ctxt->input->base);

+ node_info.begin_line = ctxt->input->line;

+ }

failed = htmlParseStartTag(ctxt);

name = ctxt->name;

if ((failed == -1) ||

@@ -5536,6 +5648,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

htmlnamePop(ctxt);

}

+ if (ctxt->record_info)

+ htmlNodeInfoPush(ctxt, &node_info);

ctxt->instate = XML_PARSER_CONTENT;

#ifdef DEBUG_PUSH

xmlGenericError(xmlGenericErrorContext,

@@ -5552,6 +5667,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

ctxt->sax->endElement(ctxt->userData, name);

htmlnamePop(ctxt);

}

+ if (ctxt->record_info)

+ htmlNodeInfoPush(ctxt, &node_info);

ctxt->instate = XML_PARSER_CONTENT;

#ifdef DEBUG_PUSH

xmlGenericError(xmlGenericErrorContext,

@@ -5579,9 +5698,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

if ((cur != '<') && (cur != '&')) {

if (ctxt->sax != NULL) {

if (IS_BLANK_CH(cur)) {

- if (ctxt->sax->ignorableWhitespace != NULL)

- ctxt->sax->ignorableWhitespace(

- ctxt->userData, &cur, 1);

+ if (ctxt->keepBlanks) {

+ if (ctxt->sax->characters != NULL)

+ ctxt->sax->characters(

+ ctxt->userData, &cur, 1);

+ } else {

+ if (ctxt->sax->ignorableWhitespace != NULL)

+ ctxt->sax->ignorableWhitespace(

+ ctxt->userData, &cur, 1);

+ }

} else {

htmlCheckParagraph(ctxt);

if (ctxt->sax->characters != NULL)

@@ -5609,7 +5734,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {

int idx;

xmlChar val;

- idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1);

+ idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0);

if (idx < 0)

goto done;

val = in->cur[idx + 2];

@@ -5866,7 +5991,7 @@ done:

ctxt->sax->endDocument(ctxt->userData);

}

- if ((ctxt->myDoc != NULL) &&

+ if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&

((terminate) || (ctxt->instate == XML_PARSER_EOF) ||

(ctxt->instate == XML_PARSER_EPILOG))) {

xmlDtdPtr dtd;

@@ -5904,8 +6029,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,

}

if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&

(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {

- int base = ctxt->input->base - ctxt->input->buf->buffer->content;

- int cur = ctxt->input->cur - ctxt->input->base;

+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);

+ size_t cur = ctxt->input->cur - ctxt->input->base;

int res;

res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);

@@ -5914,10 +6039,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,

ctxt->disableSAX = 1;

return (XML_PARSER_EOF);

}

- ctxt->input->base = ctxt->input->buf->buffer->content + base;

- ctxt->input->cur = ctxt->input->base + cur;

- ctxt->input->end =

- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];

+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);

#ifdef DEBUG_PUSH

xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);

#endif

@@ -5932,13 +6054,16 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,

if ((in->encoder != NULL) && (in->buffer != NULL) &&

(in->raw != NULL)) {

int nbchars;

+ size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);

+ size_t current = ctxt->input->cur - ctxt->input->base;

- nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);

+ nbchars = xmlCharEncInput(in, terminate);

if (nbchars < 0) {

htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,

"encoder error\n", NULL, NULL);

return(XML_ERR_INVALID_ENCODING);

}

+ xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);

}

@@ -6032,24 +6157,18 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,

inputStream->filename = (char *)

xmlCanonicPath((const xmlChar *) filename);

inputStream->buf = buf;

- inputStream->base = inputStream->buf->buffer->content;

- inputStream->cur = inputStream->buf->buffer->content;

- inputStream->end =

- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];

+ xmlBufResetInput(buf->buffer, inputStream);

inputPush(ctxt, inputStream);

if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&

(ctxt->input->buf != NULL)) {

- int base = ctxt->input->base - ctxt->input->buf->buffer->content;

- int cur = ctxt->input->cur - ctxt->input->base;

+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);

+ size_t cur = ctxt->input->cur - ctxt->input->base;

xmlParserInputBufferPush(ctxt->input->buf, size, chunk);

- ctxt->input->base = ctxt->input->buf->buffer->content + base;

- ctxt->input->cur = ctxt->input->base + cur;

- ctxt->input->end =

- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];

+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);

#ifdef DEBUG_PUSH

xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);

#endif

@@ -6169,12 +6288,16 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding)

/* set encoding */

if (encoding) {

- content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);

- if (content) {

- strcpy ((char *)content, (char *)content_line);

- strcat ((char *)content, (char *)encoding);

- htmlCheckEncoding (ctxt, content);

- xmlFree (content);

+ size_t l = strlen(encoding);

+ if (l < 1000) {

+ content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1);

+ if (content) {

+ strcpy ((char *)content, (char *)content_line);

+ strcat ((char *)content, (char *)encoding);

+ htmlCheckEncoding (ctxt, content);

+ xmlFree (content);

+ }

}

@@ -6451,6 +6574,7 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)

ctxt->wellFormed = 1;

ctxt->nsWellFormed = 1;

+ ctxt->disableSAX = 0;

ctxt->valid = 1;

ctxt->vctxt.userData = ctxt;

ctxt->vctxt.error = xmlParserValidityError;

@@ -6530,6 +6654,18 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)

ctxt->options |= XML_PARSE_HUGE;

options -= XML_PARSE_HUGE;

}

+ if (options & HTML_PARSE_NODEFDTD) {

+ ctxt->options |= HTML_PARSE_NODEFDTD;

+ options -= HTML_PARSE_NODEFDTD;

+ }

+ if (options & HTML_PARSE_IGNORE_ENC) {

+ ctxt->options |= HTML_PARSE_IGNORE_ENC;

+ options -= HTML_PARSE_IGNORE_ENC;

+ }

+ if (options & HTML_PARSE_NOIMPLIED) {

+ ctxt->options |= HTML_PARSE_NOIMPLIED;

+ options -= HTML_PARSE_NOIMPLIED;

+ }

ctxt->dictNames = 0;

return (options);

}

@@ -6676,6 +6812,7 @@ htmlReadFd(int fd, const char *URL, const char *encoding, int options)

if (fd < 0)

return (NULL);

+ xmlInitParser();

xmlInitParser();

input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);

@@ -6723,8 +6860,11 @@ htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,

input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,

XML_CHAR_ENCODING_NONE);

- if (input == NULL)

+ if (input == NULL) {

+ if (ioclose != NULL)

+ ioclose(ioctx);

return (NULL);

+ }

ctxt = htmlNewParserCtxt();

if (ctxt == NULL) {

xmlFreeParserInputBuffer(input);

@@ -6763,6 +6903,7 @@ htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,

return (NULL);

if (ctxt == NULL)

return (NULL);

+ xmlInitParser();

htmlCtxtReset(ctxt);

@@ -6796,6 +6937,7 @@ htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,

return (NULL);

if (ctxt == NULL)

return (NULL);

+ xmlInitParser();

htmlCtxtReset(ctxt);

@@ -6832,6 +6974,7 @@ htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,

return (NULL);

if (buffer == NULL)

return (NULL);

+ xmlInitParser();

htmlCtxtReset(ctxt);

@@ -6874,6 +7017,7 @@ htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,

return (NULL);

if (ctxt == NULL)

return (NULL);

+ xmlInitParser();

htmlCtxtReset(ctxt);

@@ -6918,13 +7062,17 @@ htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,

return (NULL);

if (ctxt == NULL)

return (NULL);

+ xmlInitParser();

htmlCtxtReset(ctxt);

input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,

XML_CHAR_ENCODING_NONE);

- if (input == NULL)

+ if (input == NULL) {

+ if (ioclose != NULL)

+ ioclose(ioctx);

return (NULL);

+ }

stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);

if (stream == NULL) {

xmlFreeParserInputBuffer(input);

« no previous file with comments | « third_party/libxml/src/DOCBparser.c ('k') | third_party/libxml/src/HTMLtree.c » ('j') | no next file with comments »