Index: third_party/libxml/src/HTMLparser.c |
diff --git a/third_party/libxml/src/HTMLparser.c b/third_party/libxml/src/HTMLparser.c |
index 42dc776ae587e03d38416dafe072e563d4040683..d329d3b54076124bf130815f59e5bef13b062d3d 100644 |
--- a/third_party/libxml/src/HTMLparser.c |
+++ b/third_party/libxml/src/HTMLparser.c |
@@ -44,6 +44,9 @@ |
#include <libxml/globals.h> |
#include <libxml/uri.h> |
+#include "buf.h" |
+#include "enc.h" |
+ |
#define HTML_MAX_NAMELEN 1000 |
#define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
#define HTML_PARSER_BUFFER_SIZE 100 |
@@ -727,7 +730,7 @@ static const char* const map_contents[] = { BLOCK, "area", NULL } ; |
static const char* const name_attr[] = { "name", NULL } ; |
static const char* const action_attr[] = { "action", NULL } ; |
static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
-static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ; |
+static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ; |
static const char* const content_attr[] = { "content", NULL } ; |
static const char* const type_attr[] = { "type", NULL } ; |
static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
@@ -1080,9 +1083,9 @@ static const char * const htmlStartClose[] = { |
"menu", "p", "head", "ul", NULL, |
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, |
"div", "p", "head", NULL, |
-"noscript", "p", "head", NULL, |
+"noscript", "p", NULL, |
"center", "font", "b", "i", "p", "head", NULL, |
-"a", "a", NULL, |
+"a", "a", "head", NULL, |
"caption", "p", NULL, |
"colgroup", "caption", "colgroup", "col", "p", NULL, |
"col", "caption", "col", "p", NULL, |
@@ -1100,6 +1103,43 @@ static const char * const htmlStartClose[] = { |
"option", "option", NULL, |
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
"pre", "listing", "xmp", "a", NULL, |
+/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ |
+"tt", "head", NULL, |
+"i", "head", NULL, |
+"b", "head", NULL, |
+"u", "head", NULL, |
+"s", "head", NULL, |
+"strike", "head", NULL, |
+"big", "head", NULL, |
+"small", "head", NULL, |
+ |
+"em", "head", NULL, |
+"strong", "head", NULL, |
+"dfn", "head", NULL, |
+"code", "head", NULL, |
+"samp", "head", NULL, |
+"kbd", "head", NULL, |
+"var", "head", NULL, |
+"cite", "head", NULL, |
+"abbr", "head", NULL, |
+"acronym", "head", NULL, |
+ |
+/* "a" */ |
+"img", "head", NULL, |
+/* "applet" */ |
+/* "embed" */ |
+/* "object" */ |
+"font", "head", NULL, |
+/* "basefont" */ |
+"br", "head", NULL, |
+/* "script" */ |
+"map", "head", NULL, |
+"q", "head", NULL, |
+"sub", "head", NULL, |
+"sup", "head", NULL, |
+"span", "head", NULL, |
+"bdo", "head", NULL, |
+"iframe", "head", NULL, |
NULL |
}; |
@@ -1137,7 +1177,7 @@ static const char *const htmlScriptAttributes[] = { |
"onfocus", |
"onblur", |
"onsubmit", |
- "onrest", |
+ "onreset", |
"onchange", |
"onselect" |
}; |
@@ -2887,9 +2927,11 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { |
} |
if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { |
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
- "Invalid char in CDATA 0x%X\n", cur); |
- NEXT; |
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
+ "Invalid char in CDATA 0x%X\n", cur); |
+ if (ctxt->input->cur < ctxt->input->end) { |
+ NEXT; |
+ } |
} |
if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
@@ -2939,9 +2981,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) { |
*/ |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
- if (ctxt->sax->ignorableWhitespace != NULL) |
- ctxt->sax->ignorableWhitespace(ctxt->userData, |
- buf, nbchar); |
+ if (ctxt->keepBlanks) { |
+ if (ctxt->sax->characters != NULL) |
+ ctxt->sax->characters(ctxt->userData, buf, nbchar); |
+ } else { |
+ if (ctxt->sax->ignorableWhitespace != NULL) |
+ ctxt->sax->ignorableWhitespace(ctxt->userData, |
+ buf, nbchar); |
+ } |
} else { |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
@@ -2972,8 +3019,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) { |
*/ |
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
if (areBlanks(ctxt, buf, nbchar)) { |
- if (ctxt->sax->ignorableWhitespace != NULL) |
- ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); |
+ if (ctxt->keepBlanks) { |
+ if (ctxt->sax->characters != NULL) |
+ ctxt->sax->characters(ctxt->userData, buf, nbchar); |
+ } else { |
+ if (ctxt->sax->ignorableWhitespace != NULL) |
+ ctxt->sax->ignorableWhitespace(ctxt->userData, |
+ buf, nbchar); |
+ } |
} else { |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
@@ -3275,7 +3328,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) { |
val = val * 16 + (CUR - 'A') + 10; |
else { |
htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, |
- "htmlParseCharRef: missing semicolumn\n", |
+ "htmlParseCharRef: missing semicolon\n", |
NULL, NULL); |
break; |
} |
@@ -3290,7 +3343,7 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) { |
val = val * 10 + (CUR - '0'); |
else { |
htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, |
- "htmlParseCharRef: missing semicolumn\n", |
+ "htmlParseCharRef: missing semicolon\n", |
NULL, NULL); |
break; |
} |
@@ -3433,34 +3486,26 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) { |
} |
/** |
- * htmlCheckEncoding: |
+ * htmlCheckEncodingDirect: |
* @ctxt: an HTML parser context |
* @attvalue: the attribute value |
* |
- * Checks an http-equiv attribute from a Meta tag to detect |
+ * Checks an attribute value to detect |
* the encoding |
* If a new encoding is detected the parser is switched to decode |
* it and pass UTF8 |
*/ |
static void |
-htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
- const xmlChar *encoding; |
+htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { |
- if ((ctxt == NULL) || (attvalue == NULL)) |
+ if ((ctxt == NULL) || (encoding == NULL) || |
+ (ctxt->options & HTML_PARSE_IGNORE_ENC)) |
return; |
/* do not change encoding */ |
if (ctxt->input->encoding != NULL) |
return; |
- encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); |
- if (encoding != NULL) { |
- encoding += 8; |
- } else { |
- encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); |
- if (encoding != NULL) |
- encoding += 9; |
- } |
if (encoding != NULL) { |
xmlCharEncoding enc; |
xmlCharEncodingHandlerPtr handler; |
@@ -3498,7 +3543,9 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
xmlSwitchToEncoding(ctxt, handler); |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
} else { |
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; |
+ htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
+ "htmlCheckEncoding: unknown encoding %s\n", |
+ encoding, NULL); |
} |
} |
@@ -3513,24 +3560,51 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
* convert as much as possible to the parser reading buffer. |
*/ |
processed = ctxt->input->cur - ctxt->input->base; |
- xmlBufferShrink(ctxt->input->buf->buffer, processed); |
- nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, |
- ctxt->input->buf->buffer, |
- ctxt->input->buf->raw); |
+ xmlBufShrink(ctxt->input->buf->buffer, processed); |
+ nbchars = xmlCharEncInput(ctxt->input->buf, 1); |
if (nbchars < 0) { |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
"htmlCheckEncoding: encoder error\n", |
NULL, NULL); |
} |
- ctxt->input->base = |
- ctxt->input->cur = ctxt->input->buf->buffer->content; |
- ctxt->input->end = |
- &ctxt->input->base[ctxt->input->buf->buffer->use]; |
+ xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); |
} |
} |
} |
/** |
+ * htmlCheckEncoding: |
+ * @ctxt: an HTML parser context |
+ * @attvalue: the attribute value |
+ * |
+ * Checks an http-equiv attribute from a Meta tag to detect |
+ * the encoding |
+ * If a new encoding is detected the parser is switched to decode |
+ * it and pass UTF8 |
+ */ |
+static void |
+htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
+ const xmlChar *encoding; |
+ |
+ if (!attvalue) |
+ return; |
+ |
+ encoding = xmlStrcasestr(attvalue, BAD_CAST"charset"); |
+ if (encoding != NULL) { |
+ encoding += 7; |
+ } |
+ /* |
+ * skip blank |
+ */ |
+ if (encoding && IS_BLANK_CH(*encoding)) |
+ encoding = xmlStrcasestr(attvalue, BAD_CAST"="); |
+ if (encoding && *encoding == '=') { |
+ encoding ++; |
+ htmlCheckEncodingDirect(ctxt, encoding); |
+ } |
+} |
+ |
+/** |
* htmlCheckMeta: |
* @ctxt: an HTML parser context |
* @atts: the attributes values |
@@ -3554,6 +3628,8 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { |
if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
http = 1; |
+ else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset"))) |
+ htmlCheckEncodingDirect(ctxt, value); |
else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
content = value; |
att = atts[i++]; |
@@ -3595,13 +3671,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { |
int i; |
int discardtag = 0; |
- if (ctxt->instate == XML_PARSER_EOF) |
- return(-1); |
if ((ctxt == NULL) || (ctxt->input == NULL)) { |
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
"htmlParseStartTag: context error\n", NULL, NULL); |
return -1; |
} |
+ if (ctxt->instate == XML_PARSER_EOF) |
+ return(-1); |
if (CUR != '<') return -1; |
NEXT; |
@@ -3883,6 +3959,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) |
if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
ctxt->sax->endElement(ctxt->userData, name); |
+ htmlNodeInfoPop(ctxt); |
htmlnamePop(ctxt); |
ret = 1; |
} else { |
@@ -4289,7 +4366,7 @@ static void |
htmlParseElementInternal(htmlParserCtxtPtr ctxt) { |
const xmlChar *name; |
const htmlElemDesc * info; |
- htmlParserNodeInfo node_info; |
+ htmlParserNodeInfo node_info = { 0, }; |
int failed; |
if ((ctxt == NULL) || (ctxt->input == NULL)) { |
@@ -4670,7 +4747,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { |
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
ctxt->sax->endDocument(ctxt->userData); |
- if (ctxt->myDoc != NULL) { |
+ if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) { |
dtd = xmlGetIntSubset(ctxt->myDoc); |
if (dtd == NULL) |
ctxt->myDoc->intSubset = |
@@ -4875,9 +4952,7 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) { |
input->filename = NULL; |
input->buf = buf; |
- input->base = input->buf->buffer->content; |
- input->cur = input->buf->buffer->content; |
- input->end = &input->buf->buffer->content[input->buf->buffer->use]; |
+ xmlBufResetInput(buf->buffer, input); |
inputPush(ctxt, input); |
return(ctxt); |
@@ -4994,8 +5069,8 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, |
buf = in->base; |
len = in->length; |
} else { |
- buf = in->buf->buffer->content; |
- len = in->buf->buffer->use; |
+ buf = xmlBufContent(in->buf->buffer); |
+ len = xmlBufUse(in->buf->buffer); |
} |
/* take into account the sequence length */ |
@@ -5087,13 +5162,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, |
* @stop: Array of chars, which stop the lookup. |
* @stopLen: Length of stop-Array |
* |
- * Try to find if any char of the stop-Array is available in the input |
+ * Try to find if any char of the stop-Array is available in the input |
* stream. |
* This function has a side effect of (possibly) incrementing ctxt->checkIndex |
* to avoid rescanning sequences of bytes, it DOES change the state of the |
* parser, do not use liberally. |
* |
- * Returns the index to the current parsing point if a stopChar |
+ * Returns the index to the current parsing point if a stopChar |
* is available, -1 otherwise. |
*/ |
static int |
@@ -5121,8 +5196,8 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, |
buf = in->base; |
len = in->length; |
} else { |
- buf = in->buf->buffer->content; |
- len = in->buf->buffer->use; |
+ buf = xmlBufContent(in->buf->buffer); |
+ len = xmlBufUse(in->buf->buffer); |
} |
for (; base < len; base++) { |
@@ -5171,6 +5246,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
int avail = 0; |
xmlChar cur, next; |
+ htmlParserNodeInfo node_info; |
+ |
#ifdef DEBUG_PUSH |
switch (ctxt->instate) { |
case XML_PARSER_EOF: |
@@ -5231,7 +5308,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
else |
- avail = in->buf->buffer->use - (in->cur - in->base); |
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if ((avail == 0) && (terminate)) { |
htmlAutoCloseOnEnd(ctxt); |
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
@@ -5267,7 +5344,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
else |
- avail = in->buf->buffer->use - (in->cur - in->base); |
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
} |
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
ctxt->sax->setDocumentLocator(ctxt->userData, |
@@ -5309,11 +5386,24 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
else |
- avail = in->buf->buffer->use - (in->cur - in->base); |
- if (avail < 2) |
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
+ /* |
+ * no chars in buffer |
+ */ |
+ if (avail < 1) |
goto done; |
+ /* |
+ * not enouth chars in buffer |
+ */ |
+ if (avail < 2) { |
+ if (!terminate) |
+ goto done; |
+ else |
+ next = ' '; |
+ } else { |
+ next = in->cur[1]; |
+ } |
cur = in->cur[0]; |
- next = in->cur[1]; |
if ((cur == '<') && (next == '!') && |
(in->cur[2] == '-') && (in->cur[3] == '-')) { |
if ((!terminate) && |
@@ -5369,7 +5459,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
else |
- avail = in->buf->buffer->use - (in->cur - in->base); |
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if (avail < 2) |
goto done; |
cur = in->cur[0]; |
@@ -5410,7 +5500,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
if (in->buf == NULL) |
avail = in->length - (in->cur - in->base); |
else |
- avail = in->buf->buffer->use - (in->cur - in->base); |
+ avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
if (avail < 1) |
goto done; |
cur = in->cur[0]; |
@@ -5463,8 +5553,22 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
int failed; |
const htmlElemDesc * info; |
- if (avail < 2) |
+ /* |
+ * no chars in buffer |
+ */ |
+ if (avail < 1) |
goto done; |
+ /* |
+ * not enouth chars in buffer |
+ */ |
+ if (avail < 2) { |
+ if (!terminate) |
+ goto done; |
+ else |
+ next = ' '; |
+ } else { |
+ next = in->cur[1]; |
+ } |
cur = in->cur[0]; |
if (cur != '<') { |
ctxt->instate = XML_PARSER_CONTENT; |
@@ -5474,7 +5578,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
#endif |
break; |
} |
- if (in->cur[1] == '/') { |
+ if (next == '/') { |
ctxt->instate = XML_PARSER_END_TAG; |
ctxt->checkIndex = 0; |
#ifdef DEBUG_PUSH |
@@ -5487,6 +5591,14 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
goto done; |
+ /* Capture start position */ |
+ if (ctxt->record_info) { |
+ node_info.begin_pos = ctxt->input->consumed + |
+ (CUR_PTR - ctxt->input->base); |
+ node_info.begin_line = ctxt->input->line; |
+ } |
+ |
+ |
failed = htmlParseStartTag(ctxt); |
name = ctxt->name; |
if ((failed == -1) || |
@@ -5536,6 +5648,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
htmlnamePop(ctxt); |
} |
+ if (ctxt->record_info) |
+ htmlNodeInfoPush(ctxt, &node_info); |
+ |
ctxt->instate = XML_PARSER_CONTENT; |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, |
@@ -5552,6 +5667,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
ctxt->sax->endElement(ctxt->userData, name); |
htmlnamePop(ctxt); |
} |
+ |
+ if (ctxt->record_info) |
+ htmlNodeInfoPush(ctxt, &node_info); |
+ |
ctxt->instate = XML_PARSER_CONTENT; |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, |
@@ -5579,9 +5698,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
if ((cur != '<') && (cur != '&')) { |
if (ctxt->sax != NULL) { |
if (IS_BLANK_CH(cur)) { |
- if (ctxt->sax->ignorableWhitespace != NULL) |
- ctxt->sax->ignorableWhitespace( |
- ctxt->userData, &cur, 1); |
+ if (ctxt->keepBlanks) { |
+ if (ctxt->sax->characters != NULL) |
+ ctxt->sax->characters( |
+ ctxt->userData, &cur, 1); |
+ } else { |
+ if (ctxt->sax->ignorableWhitespace != NULL) |
+ ctxt->sax->ignorableWhitespace( |
+ ctxt->userData, &cur, 1); |
+ } |
} else { |
htmlCheckParagraph(ctxt); |
if (ctxt->sax->characters != NULL) |
@@ -5609,7 +5734,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
int idx; |
xmlChar val; |
- idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); |
+ idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0); |
if (idx < 0) |
goto done; |
val = in->cur[idx + 2]; |
@@ -5866,7 +5991,7 @@ done: |
ctxt->sax->endDocument(ctxt->userData); |
} |
} |
- if ((ctxt->myDoc != NULL) && |
+ if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) && |
((terminate) || (ctxt->instate == XML_PARSER_EOF) || |
(ctxt->instate == XML_PARSER_EPILOG))) { |
xmlDtdPtr dtd; |
@@ -5904,8 +6029,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, |
} |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
- int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
- int cur = ctxt->input->cur - ctxt->input->base; |
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
+ size_t cur = ctxt->input->cur - ctxt->input->base; |
int res; |
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
@@ -5914,10 +6039,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, |
ctxt->disableSAX = 1; |
return (XML_PARSER_EOF); |
} |
- ctxt->input->base = ctxt->input->buf->buffer->content + base; |
- ctxt->input->cur = ctxt->input->base + cur; |
- ctxt->input->end = |
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; |
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
#endif |
@@ -5932,13 +6054,16 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, |
if ((in->encoder != NULL) && (in->buffer != NULL) && |
(in->raw != NULL)) { |
int nbchars; |
+ size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); |
+ size_t current = ctxt->input->cur - ctxt->input->base; |
- nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); |
+ nbchars = xmlCharEncInput(in, terminate); |
if (nbchars < 0) { |
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
"encoder error\n", NULL, NULL); |
return(XML_ERR_INVALID_ENCODING); |
} |
+ xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); |
} |
} |
} |
@@ -6032,24 +6157,18 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, |
inputStream->filename = (char *) |
xmlCanonicPath((const xmlChar *) filename); |
inputStream->buf = buf; |
- inputStream->base = inputStream->buf->buffer->content; |
- inputStream->cur = inputStream->buf->buffer->content; |
- inputStream->end = |
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; |
+ xmlBufResetInput(buf->buffer, inputStream); |
inputPush(ctxt, inputStream); |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL)) { |
- int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
- int cur = ctxt->input->cur - ctxt->input->base; |
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
+ size_t cur = ctxt->input->cur - ctxt->input->base; |
xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
- ctxt->input->base = ctxt->input->buf->buffer->content + base; |
- ctxt->input->cur = ctxt->input->base + cur; |
- ctxt->input->end = |
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; |
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
#ifdef DEBUG_PUSH |
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
#endif |
@@ -6169,12 +6288,16 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) |
/* set encoding */ |
if (encoding) { |
- content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1); |
- if (content) { |
- strcpy ((char *)content, (char *)content_line); |
- strcat ((char *)content, (char *)encoding); |
- htmlCheckEncoding (ctxt, content); |
- xmlFree (content); |
+ size_t l = strlen(encoding); |
+ |
+ if (l < 1000) { |
+ content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1); |
+ if (content) { |
+ strcpy ((char *)content, (char *)content_line); |
+ strcat ((char *)content, (char *)encoding); |
+ htmlCheckEncoding (ctxt, content); |
+ xmlFree (content); |
+ } |
} |
} |
@@ -6451,6 +6574,7 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt) |
ctxt->wellFormed = 1; |
ctxt->nsWellFormed = 1; |
+ ctxt->disableSAX = 0; |
ctxt->valid = 1; |
ctxt->vctxt.userData = ctxt; |
ctxt->vctxt.error = xmlParserValidityError; |
@@ -6530,6 +6654,18 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) |
ctxt->options |= XML_PARSE_HUGE; |
options -= XML_PARSE_HUGE; |
} |
+ if (options & HTML_PARSE_NODEFDTD) { |
+ ctxt->options |= HTML_PARSE_NODEFDTD; |
+ options -= HTML_PARSE_NODEFDTD; |
+ } |
+ if (options & HTML_PARSE_IGNORE_ENC) { |
+ ctxt->options |= HTML_PARSE_IGNORE_ENC; |
+ options -= HTML_PARSE_IGNORE_ENC; |
+ } |
+ if (options & HTML_PARSE_NOIMPLIED) { |
+ ctxt->options |= HTML_PARSE_NOIMPLIED; |
+ options -= HTML_PARSE_NOIMPLIED; |
+ } |
ctxt->dictNames = 0; |
return (options); |
} |
@@ -6676,6 +6812,7 @@ htmlReadFd(int fd, const char *URL, const char *encoding, int options) |
if (fd < 0) |
return (NULL); |
+ xmlInitParser(); |
xmlInitParser(); |
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); |
@@ -6723,8 +6860,11 @@ htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
XML_CHAR_ENCODING_NONE); |
- if (input == NULL) |
+ if (input == NULL) { |
+ if (ioclose != NULL) |
+ ioclose(ioctx); |
return (NULL); |
+ } |
ctxt = htmlNewParserCtxt(); |
if (ctxt == NULL) { |
xmlFreeParserInputBuffer(input); |
@@ -6763,6 +6903,7 @@ htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, |
return (NULL); |
if (ctxt == NULL) |
return (NULL); |
+ xmlInitParser(); |
htmlCtxtReset(ctxt); |
@@ -6796,6 +6937,7 @@ htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, |
return (NULL); |
if (ctxt == NULL) |
return (NULL); |
+ xmlInitParser(); |
htmlCtxtReset(ctxt); |
@@ -6832,6 +6974,7 @@ htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, |
return (NULL); |
if (buffer == NULL) |
return (NULL); |
+ xmlInitParser(); |
htmlCtxtReset(ctxt); |
@@ -6874,6 +7017,7 @@ htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, |
return (NULL); |
if (ctxt == NULL) |
return (NULL); |
+ xmlInitParser(); |
htmlCtxtReset(ctxt); |
@@ -6918,13 +7062,17 @@ htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, |
return (NULL); |
if (ctxt == NULL) |
return (NULL); |
+ xmlInitParser(); |
htmlCtxtReset(ctxt); |
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
XML_CHAR_ENCODING_NONE); |
- if (input == NULL) |
+ if (input == NULL) { |
+ if (ioclose != NULL) |
+ ioclose(ioctx); |
return (NULL); |
+ } |
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
if (stream == NULL) { |
xmlFreeParserInputBuffer(input); |