Index: third_party/libxml/parser.c |
diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c |
index 5948f91f2cf7f2db87782e996f3b8fd6deb92d12..4a3cde523e0872e2f907e2b76e9b4ff3cdc4cc9d 100644 |
--- a/third_party/libxml/parser.c |
+++ b/third_party/libxml/parser.c |
@@ -80,20 +80,115 @@ |
#include <zlib.h> |
#endif |
+static void |
+xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); |
+ |
+static xmlParserCtxtPtr |
+xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, |
+ const xmlChar *base, xmlParserCtxtPtr pctx); |
+ |
+/************************************************************************ |
+ * * |
+ * Arbitrary limits set in the parser. See XML_PARSE_HUGE * |
+ * * |
+ ************************************************************************/ |
+ |
+#define XML_PARSER_BIG_ENTITY 1000 |
+#define XML_PARSER_LOT_ENTITY 5000 |
+ |
+/* |
+ * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity |
+ * replacement over the size in byte of the input indicates that you have |
+ * and eponential behaviour. A value of 10 correspond to at least 3 entity |
+ * replacement per byte of input. |
+ */ |
+#define XML_PARSER_NON_LINEAR 10 |
+ |
+/* |
+ * xmlParserEntityCheck |
+ * |
+ * Function to check non-linear entity expansion behaviour |
+ * This is here to detect and stop exponential linear entity expansion |
+ * This is not a limitation of the parser but a safety |
+ * boundary feature. It can be disabled with the XML_PARSE_HUGE |
+ * parser option. |
+ */ |
+static int |
+xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, |
+ xmlEntityPtr ent) |
+{ |
+ unsigned long consumed = 0; |
+ |
+ if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) |
+ return (0); |
+ if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) |
+ return (1); |
+ if (size != 0) { |
+ /* |
+ * Do the check based on the replacement size of the entity |
+ */ |
+ if (size < XML_PARSER_BIG_ENTITY) |
+ return(0); |
+ |
+ /* |
+ * A limit on the amount of text data reasonably used |
+ */ |
+ if (ctxt->input != NULL) { |
+ consumed = ctxt->input->consumed + |
+ (ctxt->input->cur - ctxt->input->base); |
+ } |
+ consumed += ctxt->sizeentities; |
+ |
+ if ((size < XML_PARSER_NON_LINEAR * consumed) && |
+ (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) |
+ return (0); |
+ } else if (ent != NULL) { |
+ /* |
+ * use the number of parsed entities in the replacement |
+ */ |
+ size = ent->checked; |
+ |
+ /* |
+ * The amount of data parsed counting entities size only once |
+ */ |
+ if (ctxt->input != NULL) { |
+ consumed = ctxt->input->consumed + |
+ (ctxt->input->cur - ctxt->input->base); |
+ } |
+ consumed += ctxt->sizeentities; |
+ |
+ /* |
+ * Check the density of entities for the amount of data |
+ * knowing an entity reference will take at least 3 bytes |
+ */ |
+ if (size * 3 < consumed * XML_PARSER_NON_LINEAR) |
+ return (0); |
+ } else { |
+ /* |
+ * strange we got no data for checking just return |
+ */ |
+ return (0); |
+ } |
+ |
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
+ return (1); |
+} |
+ |
/** |
* xmlParserMaxDepth: |
* |
- * arbitrary depth limit for the XML documents that we allow to |
- * process. This is not a limitation of the parser but a safety |
- * boundary feature. |
+ * arbitrary depth limit for the XML documents that we allow to |
+ * process. This is not a limitation of the parser but a safety |
+ * boundary feature. It can be disabled with the XML_PARSE_HUGE |
+ * parser option. |
*/ |
-unsigned int xmlParserMaxDepth = 1024; |
+unsigned int xmlParserMaxDepth = 256; |
+ |
-#define SAX2 1 |
+#define SAX2 1 |
#define XML_PARSER_BIG_BUFFER_SIZE 300 |
#define XML_PARSER_BUFFER_SIZE 100 |
- |
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" |
/* |
@@ -107,8 +202,8 @@ static const char *xmlW3CPIs[] = { |
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ |
-xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, |
- const xmlChar **str); |
+static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, |
+ const xmlChar **str); |
static xmlParserErrors |
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
@@ -116,6 +211,9 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
void *user_data, int depth, const xmlChar *URL, |
const xmlChar *ID, xmlNodePtr *list); |
+static int |
+xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, |
+ const char *encoding); |
#ifdef LIBXML_LEGACY_ENABLED |
static void |
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, |
@@ -152,14 +250,15 @@ xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, |
return; |
if (ctxt != NULL) |
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; |
+ |
if (prefix == NULL) |
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
- ctxt->errNo, XML_ERR_FATAL, NULL, 0, |
+ XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, |
(const char *) localname, NULL, NULL, 0, 0, |
"Attribute %s redefined\n", localname); |
else |
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
- ctxt->errNo, XML_ERR_FATAL, NULL, 0, |
+ XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, |
(const char *) prefix, (const char *) localname, |
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, |
localname); |
@@ -395,7 +494,7 @@ xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
if (ctxt != NULL) |
ctxt->errNo = error; |
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, |
- XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); |
+ XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); |
if (ctxt != NULL) { |
ctxt->wellFormed = 0; |
if (ctxt->recovery == 0) |
@@ -418,20 +517,28 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
const char *msg, const xmlChar *str1, const xmlChar *str2) |
{ |
xmlStructuredErrorFunc schannel = NULL; |
- |
+ |
if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
(ctxt->instate == XML_PARSER_EOF)) |
return; |
if ((ctxt != NULL) && (ctxt->sax != NULL) && |
(ctxt->sax->initialized == XML_SAX2_MAGIC)) |
schannel = ctxt->sax->serror; |
- __xmlRaiseError(schannel, |
+ if (ctxt != NULL) { |
+ __xmlRaiseError(schannel, |
(ctxt->sax) ? ctxt->sax->warning : NULL, |
ctxt->userData, |
ctxt, NULL, XML_FROM_PARSER, error, |
XML_ERR_WARNING, NULL, 0, |
(const char *) str1, (const char *) str2, NULL, 0, 0, |
msg, (const char *) str1, (const char *) str2); |
+ } else { |
+ __xmlRaiseError(schannel, NULL, NULL, |
+ ctxt, NULL, XML_FROM_PARSER, error, |
+ XML_ERR_WARNING, NULL, 0, |
+ (const char *) str1, (const char *) str2, NULL, 0, 0, |
+ msg, (const char *) str1, (const char *) str2); |
+ } |
} |
/** |
@@ -445,7 +552,7 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
*/ |
static void |
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
- const char *msg, const xmlChar *str1) |
+ const char *msg, const xmlChar *str1, const xmlChar *str2) |
{ |
xmlStructuredErrorFunc schannel = NULL; |
@@ -457,14 +564,20 @@ xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) |
schannel = ctxt->sax->serror; |
} |
- __xmlRaiseError(schannel, |
+ if (ctxt != NULL) { |
+ __xmlRaiseError(schannel, |
ctxt->vctxt.error, ctxt->vctxt.userData, |
ctxt, NULL, XML_FROM_DTD, error, |
XML_ERR_ERROR, NULL, 0, (const char *) str1, |
- NULL, NULL, 0, 0, |
- msg, (const char *) str1); |
- if (ctxt != NULL) { |
+ (const char *) str2, NULL, 0, 0, |
+ msg, (const char *) str1, (const char *) str2); |
ctxt->valid = 0; |
+ } else { |
+ __xmlRaiseError(schannel, NULL, NULL, |
+ ctxt, NULL, XML_FROM_DTD, error, |
+ XML_ERR_ERROR, NULL, 0, (const char *) str1, |
+ (const char *) str2, NULL, 0, 0, |
+ msg, (const char *) str1, (const char *) str2); |
} |
} |
@@ -610,6 +723,31 @@ xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
ctxt->nsWellFormed = 0; |
} |
+/** |
+ * xmlNsWarn |
+ * @ctxt: an XML parser context |
+ * @error: the error number |
+ * @msg: the message |
+ * @info1: extra information string |
+ * @info2: extra information string |
+ * |
+ * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
+ */ |
+static void |
+xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
+ const char *msg, |
+ const xmlChar * info1, const xmlChar * info2, |
+ const xmlChar * info3) |
+{ |
+ if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
+ (ctxt->instate == XML_PARSER_EOF)) |
+ return; |
+ __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, |
+ XML_ERR_WARNING, NULL, 0, (const char *) info1, |
+ (const char *) info2, (const char *) info3, 0, 0, msg, |
+ info1, info2, info3); |
+} |
+ |
/************************************************************************ |
* * |
* Library wide options * |
@@ -809,7 +947,7 @@ xmlHasFeature(xmlFeature feature) |
return(1); |
#else |
return(0); |
-#endif |
+#endif |
case XML_WITH_ZLIB: |
#ifdef LIBXML_ZLIB_ENABLED |
return(1); |
@@ -865,7 +1003,7 @@ typedef xmlDefAttrs *xmlDefAttrsPtr; |
struct _xmlDefAttrs { |
int nbAttrs; /* number of defaulted attributes on that element */ |
int maxAttrs; /* the size of the array */ |
- const xmlChar *values[4]; /* array of localname/prefix/values */ |
+ const xmlChar *values[5]; /* array of localname/prefix/values/external */ |
}; |
/** |
@@ -919,7 +1057,7 @@ xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) |
* is needed. |
*/ |
static const xmlChar * |
-xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) |
+xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) |
{ |
int i; |
int remove_head = 0; |
@@ -960,7 +1098,8 @@ xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) |
return(ret); |
} else if (remove_head) { |
*len -= remove_head; |
- return(src + remove_head); |
+ memmove(src, src + remove_head, 1 + *len); |
+ return(src); |
} |
return(NULL); |
} |
@@ -1017,7 +1156,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, |
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); |
if (defaults == NULL) { |
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + |
- (4 * 4) * sizeof(const xmlChar *)); |
+ (4 * 5) * sizeof(const xmlChar *)); |
if (defaults == NULL) |
goto mem_error; |
defaults->nbAttrs = 0; |
@@ -1031,7 +1170,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, |
xmlDefAttrsPtr temp; |
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + |
- (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); |
+ (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); |
if (temp == NULL) |
goto mem_error; |
defaults = temp; |
@@ -1056,13 +1195,17 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, |
prefix = xmlDictLookup(ctxt->dict, fullattr, len); |
} |
- defaults->values[4 * defaults->nbAttrs] = name; |
- defaults->values[4 * defaults->nbAttrs + 1] = prefix; |
+ defaults->values[5 * defaults->nbAttrs] = name; |
+ defaults->values[5 * defaults->nbAttrs + 1] = prefix; |
/* intern the string and precompute the end */ |
len = xmlStrlen(value); |
value = xmlDictLookup(ctxt->dict, value, len); |
- defaults->values[4 * defaults->nbAttrs + 2] = value; |
- defaults->values[4 * defaults->nbAttrs + 3] = value + len; |
+ defaults->values[5 * defaults->nbAttrs + 2] = value; |
+ defaults->values[5 * defaults->nbAttrs + 3] = value + len; |
+ if (ctxt->external) |
+ defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; |
+ else |
+ defaults->values[5 * defaults->nbAttrs + 4] = NULL; |
defaults->nbAttrs++; |
return; |
@@ -1218,12 +1361,12 @@ xmlCheckLanguageID(const xmlChar * lang) |
/************************************************************************ |
* * |
- * Parser stacks related functions and macros * |
+ * Parser stacks related functions and macros * |
* * |
************************************************************************/ |
-xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, |
- const xmlChar ** str); |
+static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, |
+ const xmlChar ** str); |
#ifdef SAX2 |
/** |
@@ -1299,7 +1442,7 @@ nsPop(xmlParserCtxtPtr ctxt, int nr) |
} |
if (ctxt->nsNr <= 0) |
return (0); |
- |
+ |
for (i = 0;i < nr;i++) { |
ctxt->nsNr--; |
ctxt->nsTab[ctxt->nsNr] = NULL; |
@@ -1349,13 +1492,13 @@ mem_error: |
* |
* Pushes a new parser input on top of the input stack |
* |
- * Returns 0 in case of error, the index in the stack otherwise |
+ * Returns -1 in case of error, the index in the stack otherwise |
*/ |
int |
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) |
{ |
if ((ctxt == NULL) || (value == NULL)) |
- return(0); |
+ return(-1); |
if (ctxt->inputNr >= ctxt->inputMax) { |
ctxt->inputMax *= 2; |
ctxt->inputTab = |
@@ -1364,7 +1507,10 @@ inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) |
sizeof(ctxt->inputTab[0])); |
if (ctxt->inputTab == NULL) { |
xmlErrMemory(ctxt, NULL); |
- return (0); |
+ xmlFreeInputStream(value); |
+ ctxt->inputMax /= 2; |
+ value = NULL; |
+ return (-1); |
} |
} |
ctxt->inputTab[ctxt->inputNr] = value; |
@@ -1404,7 +1550,7 @@ inputPop(xmlParserCtxtPtr ctxt) |
* |
* Pushes a new element node on top of the node stack |
* |
- * Returns 0 in case of error, the index in the stack otherwise |
+ * Returns -1 in case of error, the index in the stack otherwise |
*/ |
int |
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) |
@@ -1418,22 +1564,24 @@ nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) |
sizeof(ctxt->nodeTab[0])); |
if (tmp == NULL) { |
xmlErrMemory(ctxt, NULL); |
- return (0); |
+ return (-1); |
} |
ctxt->nodeTab = tmp; |
ctxt->nodeMax *= 2; |
} |
- if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { |
+ if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && |
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) { |
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, |
- "Excessive depth in document: change xmlParserMaxDepth = %d\n", |
+ "Excessive depth in document: %d use XML_PARSE_HUGE option\n", |
xmlParserMaxDepth); |
ctxt->instate = XML_PARSER_EOF; |
- return(0); |
+ return(-1); |
} |
ctxt->nodeTab[ctxt->nodeNr] = value; |
ctxt->node = value; |
return (ctxt->nodeNr++); |
} |
+ |
/** |
* nodePop: |
* @ctxt: an XML parser context |
@@ -1600,7 +1748,8 @@ static int spacePush(xmlParserCtxtPtr ctxt, int val) { |
ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); |
if (tmp == NULL) { |
xmlErrMemory(ctxt, NULL); |
- return(0); |
+ ctxt->spaceMax /=2; |
+ return(-1); |
} |
ctxt->spaceTab = tmp; |
} |
@@ -1721,7 +1870,7 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) { |
static void xmlGROW (xmlParserCtxtPtr ctxt) { |
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
- if ((*ctxt->input->cur == 0) && |
+ if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && |
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
xmlPopInput(ctxt); |
} |
@@ -1848,10 +1997,12 @@ xmlPopInput(xmlParserCtxtPtr ctxt) { |
* |
* xmlPushInput: switch to a new input stream which is stacked on top |
* of the previous one(s). |
+ * Returns -1 in case of error or the index in the input stack |
*/ |
-void |
+int |
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { |
- if (input == NULL) return; |
+ int ret; |
+ if (input == NULL) return(-1); |
if (xmlParserDebugEntities) { |
if ((ctxt->input != NULL) && (ctxt->input->filename)) |
@@ -1861,8 +2012,9 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { |
xmlGenericError(xmlGenericErrorContext, |
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); |
} |
- inputPush(ctxt, input); |
+ ret = inputPush(ctxt, input); |
GROW; |
+ return(ret); |
} |
/** |
@@ -2240,7 +2392,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { |
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, |
"PEReference: %%%s; not found\n", |
- name); |
+ name, NULL); |
} else |
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
"PEReference: %%%s; not found\n", |
@@ -2249,7 +2401,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
} |
} else if (ctxt->input->free != deallocblankswrapper) { |
input = xmlNewBlanksWrapperInputStream(ctxt, entity); |
- xmlPushInput(ctxt, input); |
+ if (xmlPushInput(ctxt, input) < 0) |
+ return; |
} else { |
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || |
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { |
@@ -2262,7 +2415,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
* this is done independently. |
*/ |
input = xmlNewEntityInputStream(ctxt, entity); |
- xmlPushInput(ctxt, input); |
+ if (xmlPushInput(ctxt, input) < 0) |
+ return; |
/* |
* Get the 4 first bytes and decode the charset |
@@ -2305,10 +2459,10 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
/* |
* Macro used to grow the current buffer. |
*/ |
-#define growBuffer(buffer) { \ |
+#define growBuffer(buffer, n) { \ |
xmlChar *tmp; \ |
- buffer##_size += XML_PARSER_BUFFER_SIZE ; \ |
buffer##_size *= 2; \ |
+ buffer##_size += n; \ |
tmp = (xmlChar *) \ |
xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ |
if (tmp == NULL) goto mem_error; \ |
@@ -2351,7 +2505,9 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
return(NULL); |
last = str + len; |
- if (ctxt->depth > 40) { |
+ if (((ctxt->depth > 40) && |
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) || |
+ (ctxt->depth > 1024)) { |
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
return(NULL); |
} |
@@ -2381,7 +2537,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
COPY_BUF(0,buffer,nbchars,val); |
} |
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { |
- growBuffer(buffer); |
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
} |
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { |
if (xmlParserDebugEntities) |
@@ -2389,12 +2545,17 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
"String decoding Entity Reference: %.30s\n", |
str); |
ent = xmlParseStringEntityRef(ctxt, &str); |
+ if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || |
+ (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) |
+ goto int_error; |
+ if (ent != NULL) |
+ ctxt->nbentities += ent->checked; |
if ((ent != NULL) && |
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
if (ent->content != NULL) { |
COPY_BUF(0,buffer,nbchars,ent->content[0]); |
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { |
- growBuffer(buffer); |
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
} |
} else { |
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, |
@@ -2405,13 +2566,16 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
0, 0, 0); |
ctxt->depth--; |
+ |
if (rep != NULL) { |
current = rep; |
while (*current != 0) { /* non input consuming loop */ |
buffer[nbchars++] = *current++; |
if (nbchars > |
buffer_size - XML_PARSER_BUFFER_SIZE) { |
- growBuffer(buffer); |
+ if (xmlParserEntityCheck(ctxt, nbchars, ent)) |
+ goto int_error; |
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
} |
} |
xmlFree(rep); |
@@ -2423,7 +2587,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
buffer[nbchars++] = '&'; |
if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { |
- growBuffer(buffer); |
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
} |
for (;i > 0;i--) |
buffer[nbchars++] = *cur++; |
@@ -2434,10 +2598,13 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
xmlGenericError(xmlGenericErrorContext, |
"String decoding PE Reference: %.30s\n", str); |
ent = xmlParseStringPEReference(ctxt, &str); |
+ if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) |
+ goto int_error; |
+ if (ent != NULL) |
+ ctxt->nbentities += ent->checked; |
if (ent != NULL) { |
if (ent->content == NULL) { |
- if (xmlLoadEntityContent(ctxt, ent) < 0) { |
- } |
+ xmlLoadEntityContent(ctxt, ent); |
} |
ctxt->depth++; |
rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
@@ -2449,7 +2616,9 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
buffer[nbchars++] = *current++; |
if (nbchars > |
buffer_size - XML_PARSER_BUFFER_SIZE) { |
- growBuffer(buffer); |
+ if (xmlParserEntityCheck(ctxt, nbchars, ent)) |
+ goto int_error; |
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
} |
} |
xmlFree(rep); |
@@ -2460,7 +2629,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
COPY_BUF(l,buffer,nbchars,c); |
str += l; |
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { |
- growBuffer(buffer); |
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
} |
} |
if (str < last) |
@@ -2468,11 +2637,12 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
else |
c = 0; |
} |
- buffer[nbchars++] = 0; |
+ buffer[nbchars] = 0; |
return(buffer); |
mem_error: |
xmlErrMemory(ctxt, NULL); |
+int_error: |
if (rep != NULL) |
xmlFree(rep); |
if (buffer != NULL) |
@@ -2765,10 +2935,203 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { |
* * |
************************************************************************/ |
-static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); |
+/************************************************************************ |
+ * * |
+ * Routines to parse Name, NCName and NmToken * |
+ * * |
+ ************************************************************************/ |
+#ifdef DEBUG |
+static unsigned long nbParseName = 0; |
+static unsigned long nbParseNmToken = 0; |
+static unsigned long nbParseNCName = 0; |
+static unsigned long nbParseNCNameComplex = 0; |
+static unsigned long nbParseNameComplex = 0; |
+static unsigned long nbParseStringName = 0; |
+#endif |
+ |
+/* |
+ * The two following functions are related to the change of accepted |
+ * characters for Name and NmToken in the Revision 5 of XML-1.0 |
+ * They correspond to the modified production [4] and the new production [4a] |
+ * changes in that revision. Also note that the macros used for the |
+ * productions Letter, Digit, CombiningChar and Extender are not needed |
+ * anymore. |
+ * We still keep compatibility to pre-revision5 parsing semantic if the |
+ * new XML_PARSE_OLD10 option is given to the parser. |
+ */ |
+static int |
+xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { |
+ if ((ctxt->options & XML_PARSE_OLD10) == 0) { |
+ /* |
+ * Use the new checks of production [4] [4a] amd [5] of the |
+ * Update 5 of XML-1.0 |
+ */ |
+ if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ |
+ (((c >= 'a') && (c <= 'z')) || |
+ ((c >= 'A') && (c <= 'Z')) || |
+ (c == '_') || (c == ':') || |
+ ((c >= 0xC0) && (c <= 0xD6)) || |
+ ((c >= 0xD8) && (c <= 0xF6)) || |
+ ((c >= 0xF8) && (c <= 0x2FF)) || |
+ ((c >= 0x370) && (c <= 0x37D)) || |
+ ((c >= 0x37F) && (c <= 0x1FFF)) || |
+ ((c >= 0x200C) && (c <= 0x200D)) || |
+ ((c >= 0x2070) && (c <= 0x218F)) || |
+ ((c >= 0x2C00) && (c <= 0x2FEF)) || |
+ ((c >= 0x3001) && (c <= 0xD7FF)) || |
+ ((c >= 0xF900) && (c <= 0xFDCF)) || |
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
+ ((c >= 0x10000) && (c <= 0xEFFFF)))) |
+ return(1); |
+ } else { |
+ if (IS_LETTER(c) || (c == '_') || (c == ':')) |
+ return(1); |
+ } |
+ return(0); |
+} |
+ |
+static int |
+xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { |
+ if ((ctxt->options & XML_PARSE_OLD10) == 0) { |
+ /* |
+ * Use the new checks of production [4] [4a] amd [5] of the |
+ * Update 5 of XML-1.0 |
+ */ |
+ if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ |
+ (((c >= 'a') && (c <= 'z')) || |
+ ((c >= 'A') && (c <= 'Z')) || |
+ ((c >= '0') && (c <= '9')) || /* !start */ |
+ (c == '_') || (c == ':') || |
+ (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ |
+ ((c >= 0xC0) && (c <= 0xD6)) || |
+ ((c >= 0xD8) && (c <= 0xF6)) || |
+ ((c >= 0xF8) && (c <= 0x2FF)) || |
+ ((c >= 0x300) && (c <= 0x36F)) || /* !start */ |
+ ((c >= 0x370) && (c <= 0x37D)) || |
+ ((c >= 0x37F) && (c <= 0x1FFF)) || |
+ ((c >= 0x200C) && (c <= 0x200D)) || |
+ ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ |
+ ((c >= 0x2070) && (c <= 0x218F)) || |
+ ((c >= 0x2C00) && (c <= 0x2FEF)) || |
+ ((c >= 0x3001) && (c <= 0xD7FF)) || |
+ ((c >= 0xF900) && (c <= 0xFDCF)) || |
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
+ ((c >= 0x10000) && (c <= 0xEFFFF)))) |
+ return(1); |
+ } else { |
+ if ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
+ (c == '.') || (c == '-') || |
+ (c == '_') || (c == ':') || |
+ (IS_COMBINING(c)) || |
+ (IS_EXTENDER(c))) |
+ return(1); |
+ } |
+ return(0); |
+} |
+ |
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, |
int *len, int *alloc, int normalize); |
+static const xmlChar * |
+xmlParseNameComplex(xmlParserCtxtPtr ctxt) { |
+ int len = 0, l; |
+ int c; |
+ int count = 0; |
+ |
+#ifdef DEBUG |
+ nbParseNameComplex++; |
+#endif |
+ |
+ /* |
+ * Handler for more complex cases |
+ */ |
+ GROW; |
+ c = CUR_CHAR(l); |
+ if ((ctxt->options & XML_PARSE_OLD10) == 0) { |
+ /* |
+ * Use the new checks of production [4] [4a] amd [5] of the |
+ * Update 5 of XML-1.0 |
+ */ |
+ if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
+ (!(((c >= 'a') && (c <= 'z')) || |
+ ((c >= 'A') && (c <= 'Z')) || |
+ (c == '_') || (c == ':') || |
+ ((c >= 0xC0) && (c <= 0xD6)) || |
+ ((c >= 0xD8) && (c <= 0xF6)) || |
+ ((c >= 0xF8) && (c <= 0x2FF)) || |
+ ((c >= 0x370) && (c <= 0x37D)) || |
+ ((c >= 0x37F) && (c <= 0x1FFF)) || |
+ ((c >= 0x200C) && (c <= 0x200D)) || |
+ ((c >= 0x2070) && (c <= 0x218F)) || |
+ ((c >= 0x2C00) && (c <= 0x2FEF)) || |
+ ((c >= 0x3001) && (c <= 0xD7FF)) || |
+ ((c >= 0xF900) && (c <= 0xFDCF)) || |
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
+ ((c >= 0x10000) && (c <= 0xEFFFF))))) { |
+ return(NULL); |
+ } |
+ len += l; |
+ NEXTL(l); |
+ c = CUR_CHAR(l); |
+ while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ |
+ (((c >= 'a') && (c <= 'z')) || |
+ ((c >= 'A') && (c <= 'Z')) || |
+ ((c >= '0') && (c <= '9')) || /* !start */ |
+ (c == '_') || (c == ':') || |
+ (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ |
+ ((c >= 0xC0) && (c <= 0xD6)) || |
+ ((c >= 0xD8) && (c <= 0xF6)) || |
+ ((c >= 0xF8) && (c <= 0x2FF)) || |
+ ((c >= 0x300) && (c <= 0x36F)) || /* !start */ |
+ ((c >= 0x370) && (c <= 0x37D)) || |
+ ((c >= 0x37F) && (c <= 0x1FFF)) || |
+ ((c >= 0x200C) && (c <= 0x200D)) || |
+ ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ |
+ ((c >= 0x2070) && (c <= 0x218F)) || |
+ ((c >= 0x2C00) && (c <= 0x2FEF)) || |
+ ((c >= 0x3001) && (c <= 0xD7FF)) || |
+ ((c >= 0xF900) && (c <= 0xFDCF)) || |
+ ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
+ ((c >= 0x10000) && (c <= 0xEFFFF)) |
+ )) { |
+ if (count++ > 100) { |
+ count = 0; |
+ GROW; |
+ } |
+ len += l; |
+ NEXTL(l); |
+ c = CUR_CHAR(l); |
+ } |
+ } else { |
+ if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
+ (!IS_LETTER(c) && (c != '_') && |
+ (c != ':'))) { |
+ return(NULL); |
+ } |
+ len += l; |
+ NEXTL(l); |
+ c = CUR_CHAR(l); |
+ |
+ while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
+ ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
+ (c == '.') || (c == '-') || |
+ (c == '_') || (c == ':') || |
+ (IS_COMBINING(c)) || |
+ (IS_EXTENDER(c)))) { |
+ if (count++ > 100) { |
+ count = 0; |
+ GROW; |
+ } |
+ len += l; |
+ NEXTL(l); |
+ c = CUR_CHAR(l); |
+ } |
+ } |
+ if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) |
+ return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); |
+ return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
+} |
+ |
/** |
* xmlParseName: |
* @ctxt: an XML parser context |
@@ -2793,6 +3156,10 @@ xmlParseName(xmlParserCtxtPtr ctxt) { |
GROW; |
+#ifdef DEBUG |
+ nbParseName++; |
+#endif |
+ |
/* |
* Accelerator for simple ASCII names |
*/ |
@@ -2818,9 +3185,97 @@ xmlParseName(xmlParserCtxtPtr ctxt) { |
return(ret); |
} |
} |
+ /* accelerator for special cases */ |
return(xmlParseNameComplex(ctxt)); |
} |
+static const xmlChar * |
+xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { |
+ int len = 0, l; |
+ int c; |
+ int count = 0; |
+ |
+#ifdef DEBUG |
+ nbParseNCNameComplex++; |
+#endif |
+ |
+ /* |
+ * Handler for more complex cases |
+ */ |
+ GROW; |
+ c = CUR_CHAR(l); |
+ if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
+ (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { |
+ return(NULL); |
+ } |
+ |
+ while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
+ (xmlIsNameChar(ctxt, c) && (c != ':'))) { |
+ if (count++ > 100) { |
+ count = 0; |
+ GROW; |
+ } |
+ len += l; |
+ NEXTL(l); |
+ c = CUR_CHAR(l); |
+ } |
+ return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
+} |
+ |
+/** |
+ * xmlParseNCName: |
+ * @ctxt: an XML parser context |
+ * @len: lenght of the string parsed |
+ * |
+ * parse an XML name. |
+ * |
+ * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | |
+ * CombiningChar | Extender |
+ * |
+ * [5NS] NCName ::= (Letter | '_') (NCNameChar)* |
+ * |
+ * Returns the Name parsed or NULL |
+ */ |
+ |
+static const xmlChar * |
+xmlParseNCName(xmlParserCtxtPtr ctxt) { |
+ const xmlChar *in; |
+ const xmlChar *ret; |
+ int count = 0; |
+ |
+#ifdef DEBUG |
+ nbParseNCName++; |
+#endif |
+ |
+ /* |
+ * Accelerator for simple ASCII names |
+ */ |
+ in = ctxt->input->cur; |
+ if (((*in >= 0x61) && (*in <= 0x7A)) || |
+ ((*in >= 0x41) && (*in <= 0x5A)) || |
+ (*in == '_')) { |
+ in++; |
+ while (((*in >= 0x61) && (*in <= 0x7A)) || |
+ ((*in >= 0x41) && (*in <= 0x5A)) || |
+ ((*in >= 0x30) && (*in <= 0x39)) || |
+ (*in == '_') || (*in == '-') || |
+ (*in == '.')) |
+ in++; |
+ if ((*in > 0) && (*in < 0x80)) { |
+ count = in - ctxt->input->cur; |
+ ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); |
+ ctxt->input->cur = in; |
+ ctxt->nbChars += count; |
+ ctxt->input->col += count; |
+ if (ret == NULL) { |
+ xmlErrMemory(ctxt, NULL); |
+ } |
+ return(ret); |
+ } |
+ } |
+ return(xmlParseNCNameComplex(ctxt)); |
+} |
+ |
/** |
* xmlParseNameAndCompare: |
* @ctxt: an XML parser context |
@@ -2839,15 +3294,15 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { |
const xmlChar *ret; |
GROW; |
- |
+ |
in = ctxt->input->cur; |
while (*in != 0 && *in == *cmp) { |
- ++in; |
+ ++in; |
++cmp; |
ctxt->input->col++; |
} |
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { |
- /* success */ |
+ /* success */ |
ctxt->input->cur = in; |
return (const xmlChar*) 1; |
} |
@@ -2860,42 +3315,6 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { |
return ret; |
} |
-static const xmlChar * |
-xmlParseNameComplex(xmlParserCtxtPtr ctxt) { |
- int len = 0, l; |
- int c; |
- int count = 0; |
- |
- /* |
- * Handler for more complex cases |
- */ |
- GROW; |
- c = CUR_CHAR(l); |
- if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
- (!IS_LETTER(c) && (c != '_') && |
- (c != ':'))) { |
- return(NULL); |
- } |
- |
- while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
- ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
- (c == '.') || (c == '-') || |
- (c == '_') || (c == ':') || |
- (IS_COMBINING(c)) || |
- (IS_EXTENDER(c)))) { |
- if (count++ > 100) { |
- count = 0; |
- GROW; |
- } |
- len += l; |
- NEXTL(l); |
- c = CUR_CHAR(l); |
- } |
- if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) |
- return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); |
- return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
-} |
- |
/** |
* xmlParseStringName: |
* @ctxt: an XML parser context |
@@ -2921,17 +3340,19 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { |
int len = 0, l; |
int c; |
+#ifdef DEBUG |
+ nbParseStringName++; |
+#endif |
+ |
c = CUR_SCHAR(cur, l); |
- if (!IS_LETTER(c) && (c != '_') && |
- (c != ':')) { |
+ if (!xmlIsNameStartChar(ctxt, c)) { |
return(NULL); |
} |
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ |
- (c == '.') || (c == '-') || |
- (c == '_') || (c == ':') || |
- (IS_COMBINING(c)) || |
- (IS_EXTENDER(c))) { |
+ COPY_BUF(l,buf,len,c); |
+ cur += l; |
+ c = CUR_SCHAR(cur, l); |
+ while (xmlIsNameChar(ctxt, c)) { |
COPY_BUF(l,buf,len,c); |
cur += l; |
c = CUR_SCHAR(cur, l); |
@@ -2942,19 +3363,14 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { |
*/ |
xmlChar *buffer; |
int max = len * 2; |
- |
+ |
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); |
if (buffer == NULL) { |
xmlErrMemory(ctxt, NULL); |
return(NULL); |
} |
memcpy(buffer, buf, len); |
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
- /* test bigentname.xml */ |
- (c == '.') || (c == '-') || |
- (c == '_') || (c == ':') || |
- (IS_COMBINING(c)) || |
- (IS_EXTENDER(c))) { |
+ while (xmlIsNameChar(ctxt, c)) { |
if (len + 10 > max) { |
xmlChar *tmp; |
max *= 2; |
@@ -2983,7 +3399,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { |
/** |
* xmlParseNmtoken: |
* @ctxt: an XML parser context |
- * |
+ * |
* parse an XML Nmtoken. |
* |
* [7] Nmtoken ::= (NameChar)+ |
@@ -3000,14 +3416,14 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { |
int c; |
int count = 0; |
+#ifdef DEBUG |
+ nbParseNmToken++; |
+#endif |
+ |
GROW; |
c = CUR_CHAR(l); |
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ |
- (c == '.') || (c == '-') || |
- (c == '_') || (c == ':') || |
- (IS_COMBINING(c)) || |
- (IS_EXTENDER(c))) { |
+ while (xmlIsNameChar(ctxt, c)) { |
if (count++ > 100) { |
count = 0; |
GROW; |
@@ -3022,18 +3438,14 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { |
*/ |
xmlChar *buffer; |
int max = len * 2; |
- |
+ |
buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); |
if (buffer == NULL) { |
xmlErrMemory(ctxt, NULL); |
return(NULL); |
} |
memcpy(buffer, buf, len); |
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ |
- (c == '.') || (c == '-') || |
- (c == '_') || (c == ':') || |
- (IS_COMBINING(c)) || |
- (IS_EXTENDER(c))) { |
+ while (xmlIsNameChar(ctxt, c)) { |
if (count++ > 100) { |
count = 0; |
GROW; |
@@ -3262,7 +3674,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
if (val == '&') { |
if (ctxt->replaceEntities) { |
if (len > buf_size - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, 10); |
} |
buf[len++] = '&'; |
} else { |
@@ -3271,7 +3683,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
* called by the attribute() function in SAX.c |
*/ |
if (len > buf_size - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, 10); |
} |
buf[len++] = '&'; |
buf[len++] = '#'; |
@@ -3281,16 +3693,19 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
} |
} else if (val != 0) { |
if (len > buf_size - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, 10); |
} |
len += xmlCopyChar(0, &buf[len], val); |
} |
} else { |
ent = xmlParseEntityRef(ctxt); |
+ ctxt->nbentities++; |
+ if (ent != NULL) |
+ ctxt->nbentities += ent->owner; |
if ((ent != NULL) && |
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
if (len > buf_size - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, 10); |
} |
if ((ctxt->replaceEntities == 0) && |
(ent->content[0] == '&')) { |
@@ -3311,9 +3726,14 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
if (rep != NULL) { |
current = rep; |
while (*current != 0) { /* non input consuming */ |
- buf[len++] = *current++; |
+ if ((*current == 0xD) || (*current == 0xA) || |
+ (*current == 0x9)) { |
+ buf[len++] = 0x20; |
+ current++; |
+ } else |
+ buf[len++] = *current++; |
if (len > buf_size - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, 10); |
} |
} |
xmlFree(rep); |
@@ -3321,7 +3741,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
} |
} else { |
if (len > buf_size - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, 10); |
} |
if (ent->content != NULL) |
buf[len++] = ent->content[0]; |
@@ -3349,7 +3769,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
*/ |
buf[len++] = '&'; |
while (len > buf_size - i - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, i + 10); |
} |
for (;i > 0;i--) |
buf[len++] = *cur++; |
@@ -3361,8 +3781,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
if ((len != 0) || (!normalize)) { |
if ((!normalize) || (!in_space)) { |
COPY_BUF(l,buf,len,0x20); |
- if (len > buf_size - 10) { |
- growBuffer(buf); |
+ while (len > buf_size - 10) { |
+ growBuffer(buf, 10); |
} |
} |
in_space = 1; |
@@ -3371,7 +3791,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
in_space = 0; |
COPY_BUF(l,buf,len,c); |
if (len > buf_size - 10) { |
- growBuffer(buf); |
+ growBuffer(buf, 10); |
} |
} |
NEXTL(l); |
@@ -3601,7 +4021,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { |
return(buf); |
} |
-void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); |
+static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); |
/* |
* used for the test in the inner loop of the char data testing |
@@ -3765,6 +4185,9 @@ get_more: |
line = ctxt->input->line; |
col = ctxt->input->col; |
} |
+ /* something really bad happened in the SAX callback */ |
+ if (ctxt->instate != XML_PARSER_CONTENT) |
+ return; |
} |
ctxt->input->cur = in; |
if (*in == 0xD) { |
@@ -3803,7 +4226,7 @@ get_more: |
* of xmlParseCharData() when the parsing requires handling |
* of non-ASCII characters. |
*/ |
-void |
+static void |
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { |
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; |
int nbchar = 0; |
@@ -3845,6 +4268,9 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { |
} |
} |
nbchar = 0; |
+ /* something really bad happened in the SAX callback */ |
+ if (ctxt->instate != XML_PARSER_CONTENT) |
+ return; |
} |
count++; |
if (count > 50) { |
@@ -3983,8 +4409,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { |
int q, ql; |
int r, rl; |
int cur, l; |
- xmlParserInputPtr input = ctxt->input; |
int count = 0; |
+ int inputid; |
+ |
+ inputid = ctxt->input->id; |
if (buf == NULL) { |
len = 0; |
@@ -4066,7 +4494,7 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { |
"xmlParseComment: invalid xmlChar value %d\n", |
cur); |
} else { |
- if (input != ctxt->input) { |
+ if (inputid != ctxt->input->id) { |
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, |
"Comment doesn't start and stop in the same entity\n"); |
} |
@@ -4102,15 +4530,16 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { |
xmlParserInputState state; |
const xmlChar *in; |
int nbchar = 0, ccol; |
+ int inputid; |
/* |
* Check that there is a comment right here. |
*/ |
if ((RAW != '<') || (NXT(1) != '!') || |
(NXT(2) != '-') || (NXT(3) != '-')) return; |
- |
state = ctxt->instate; |
ctxt->instate = XML_PARSER_COMMENT; |
+ inputid = ctxt->input->id; |
SKIP(4); |
SHRINK; |
GROW; |
@@ -4201,6 +4630,10 @@ get_more: |
if (*in == '-') { |
if (in[1] == '-') { |
if (in[2] == '>') { |
+ if (ctxt->input->id != inputid) { |
+ xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, |
+ "comment doesn't start and stop in the same entity\n"); |
+ } |
SKIP(3); |
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && |
(!ctxt->disableSAX)) { |
@@ -4274,6 +4707,10 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { |
"xmlParsePITarget: invalid name prefix 'xml'\n", |
NULL, NULL); |
} |
+ if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { |
+ xmlNsErr(ctxt, XML_NS_ERR_COLON, |
+ "colon are forbidden from PI names '%s'\n", name, NULL, NULL); |
+ } |
return(name); |
} |
@@ -4517,6 +4954,11 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { |
"Space required after the NOTATION name'\n"); |
return; |
} |
+ if (xmlStrchr(name, ':') != NULL) { |
+ xmlNsErr(ctxt, XML_NS_ERR_COLON, |
+ "colon are forbidden from notation names '%s'\n", |
+ name, NULL, NULL); |
+ } |
SKIP_BLANKS; |
/* |
@@ -4601,6 +5043,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { |
"xmlParseEntityDecl: no name\n"); |
return; |
} |
+ if (xmlStrchr(name, ':') != NULL) { |
+ xmlNsErr(ctxt, XML_NS_ERR_COLON, |
+ "colon are forbidden from entities names '%s'\n", |
+ name, NULL, NULL); |
+ } |
skipped = SKIP_BLANKS; |
if (skipped == 0) { |
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, |
@@ -4676,6 +5123,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { |
xmlErrMemory(ctxt, "New Doc failed"); |
return; |
} |
+ ctxt->myDoc->properties = XML_DOC_INTERNAL; |
} |
if (ctxt->myDoc->intSubset == NULL) |
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, |
@@ -4748,6 +5196,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { |
xmlErrMemory(ctxt, "New Doc failed"); |
return; |
} |
+ ctxt->myDoc->properties = XML_DOC_INTERNAL; |
} |
if (ctxt->myDoc->intSubset == NULL) |
@@ -4886,7 +5335,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { |
xmlEnumerationPtr |
xmlParseNotationType(xmlParserCtxtPtr ctxt) { |
const xmlChar *name; |
- xmlEnumerationPtr ret = NULL, last = NULL, cur; |
+ xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; |
if (RAW != '(') { |
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); |
@@ -4901,24 +5350,38 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) { |
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
"Name expected in NOTATION declaration\n"); |
xmlFreeEnumeration(ret); |
- return(NULL); |
+ return(NULL); |
} |
- cur = xmlCreateEnumeration(name); |
- if (cur == NULL) { |
- xmlFreeEnumeration(ret); |
- return(NULL); |
- } |
- if (last == NULL) ret = last = cur; |
- else { |
- last->next = cur; |
- last = cur; |
+ tmp = ret; |
+ while (tmp != NULL) { |
+ if (xmlStrEqual(name, tmp->name)) { |
+ xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, |
+ "standalone: attribute notation value token %s duplicated\n", |
+ name, NULL); |
+ if (!xmlDictOwns(ctxt->dict, name)) |
+ xmlFree((xmlChar *) name); |
+ break; |
+ } |
+ tmp = tmp->next; |
+ } |
+ if (tmp == NULL) { |
+ cur = xmlCreateEnumeration(name); |
+ if (cur == NULL) { |
+ xmlFreeEnumeration(ret); |
+ return(NULL); |
+ } |
+ if (last == NULL) ret = last = cur; |
+ else { |
+ last->next = cur; |
+ last = cur; |
+ } |
} |
SKIP_BLANKS; |
} while (RAW == '|'); |
if (RAW != ')') { |
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); |
xmlFreeEnumeration(ret); |
- return(NULL); |
+ return(NULL); |
} |
NEXT; |
return(ret); |
@@ -4942,7 +5405,7 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) { |
xmlEnumerationPtr |
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { |
xmlChar *name; |
- xmlEnumerationPtr ret = NULL, last = NULL, cur; |
+ xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; |
if (RAW != '(') { |
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); |
@@ -4957,16 +5420,31 @@ xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { |
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); |
return(ret); |
} |
- cur = xmlCreateEnumeration(name); |
- xmlFree(name); |
- if (cur == NULL) { |
- xmlFreeEnumeration(ret); |
- return(NULL); |
- } |
- if (last == NULL) ret = last = cur; |
- else { |
- last->next = cur; |
- last = cur; |
+ tmp = ret; |
+ while (tmp != NULL) { |
+ if (xmlStrEqual(name, tmp->name)) { |
+ xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, |
+ "standalone: attribute enumeration value token %s duplicated\n", |
+ name, NULL); |
+ if (!xmlDictOwns(ctxt->dict, name)) |
+ xmlFree(name); |
+ break; |
+ } |
+ tmp = tmp->next; |
+ } |
+ if (tmp == NULL) { |
+ cur = xmlCreateEnumeration(name); |
+ if (!xmlDictOwns(ctxt->dict, name)) |
+ xmlFree(name); |
+ if (cur == NULL) { |
+ xmlFreeEnumeration(ret); |
+ return(NULL); |
+ } |
+ if (last == NULL) ret = last = cur; |
+ else { |
+ last->next = cur; |
+ last = cur; |
+ } |
} |
SKIP_BLANKS; |
} while (RAW == '|'); |
@@ -5213,8 +5691,9 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { |
} |
if (RAW == '>') { |
if (input != ctxt->input) { |
- xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, |
- "Attribute list declaration doesn't start and stop in the same entity\n"); |
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, |
+ "Attribute list declaration doesn't start and stop in the same entity\n", |
+ NULL, NULL); |
} |
NEXT; |
} |
@@ -5254,7 +5733,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { |
if ((ctxt->validate) && (ctxt->input->id != inputchk)) { |
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, |
"Element content declaration doesn't start and stop in the same entity\n", |
- NULL); |
+ NULL, NULL); |
} |
NEXT; |
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); |
@@ -5308,11 +5787,12 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { |
if (cur->c2 != NULL) |
cur->c2->parent = cur; |
} |
- ret->ocur = XML_ELEMENT_CONTENT_MULT; |
+ if (ret != NULL) |
+ ret->ocur = XML_ELEMENT_CONTENT_MULT; |
if ((ctxt->validate) && (ctxt->input->id != inputchk)) { |
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, |
"Element content declaration doesn't start and stop in the same entity\n", |
- NULL); |
+ NULL, NULL); |
} |
SKIP(2); |
} else { |
@@ -5328,9 +5808,10 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { |
} |
/** |
- * xmlParseElementChildrenContentDecl: |
+ * xmlParseElementChildrenContentDeclPriv: |
* @ctxt: an XML parser context |
* @inputchk: the input used for the current entity, needed for boundary checks |
+ * @depth: the level of recursion |
* |
* parse the declaration for a Mixed Element content |
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl |
@@ -5358,17 +5839,19 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { |
* Returns the tree of xmlElementContentPtr describing the element |
* hierarchy. |
*/ |
-xmlElementContentPtr |
-xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { |
+static xmlElementContentPtr |
+xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, |
+ int depth) { |
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; |
const xmlChar *elem; |
xmlChar type = 0; |
- if (ctxt->depth > 128) { |
+ if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || |
+ (depth > 2048)) { |
xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, |
- "xmlParseElementChildrenContentDecl : depth %d too deep\n", |
- ctxt->depth); |
- return(NULL); |
+"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", |
+ depth); |
+ return(NULL); |
} |
SKIP_BLANKS; |
GROW; |
@@ -5378,9 +5861,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { |
/* Recurse on first child */ |
NEXT; |
SKIP_BLANKS; |
- ctxt->depth++; |
- cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); |
- ctxt->depth--; |
+ cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, |
+ depth + 1); |
SKIP_BLANKS; |
GROW; |
} else { |
@@ -5512,9 +5994,8 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { |
/* Recurse on second child */ |
NEXT; |
SKIP_BLANKS; |
- ctxt->depth++; |
- last = xmlParseElementChildrenContentDecl(ctxt, inputid); |
- ctxt->depth--; |
+ last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, |
+ depth + 1); |
SKIP_BLANKS; |
} else { |
elem = xmlParseName(ctxt); |
@@ -5554,7 +6035,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { |
if ((ctxt->validate) && (ctxt->input->id != inputchk)) { |
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, |
"Element content declaration doesn't start and stop in the same entity\n", |
- NULL); |
+ NULL, NULL); |
} |
NEXT; |
if (RAW == '?') { |
@@ -5625,6 +6106,42 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { |
} |
/** |
+ * xmlParseElementChildrenContentDecl: |
+ * @ctxt: an XML parser context |
+ * @inputchk: the input used for the current entity, needed for boundary checks |
+ * |
+ * parse the declaration for a Mixed Element content |
+ * The leading '(' and spaces have been skipped in xmlParseElementContentDecl |
+ * |
+ * [47] children ::= (choice | seq) ('?' | '*' | '+')? |
+ * |
+ * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? |
+ * |
+ * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' |
+ * |
+ * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' |
+ * |
+ * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] |
+ * TODO Parameter-entity replacement text must be properly nested |
+ * with parenthesized groups. That is to say, if either of the |
+ * opening or closing parentheses in a choice, seq, or Mixed |
+ * construct is contained in the replacement text for a parameter |
+ * entity, both must be contained in the same replacement text. For |
+ * interoperability, if a parameter-entity reference appears in a |
+ * choice, seq, or Mixed construct, its replacement text should not |
+ * be empty, and neither the first nor last non-blank character of |
+ * the replacement text should be a connector (| or ,). |
+ * |
+ * Returns the tree of xmlElementContentPtr describing the element |
+ * hierarchy. |
+ */ |
+xmlElementContentPtr |
+xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { |
+ /* stub left for API/ABI compat */ |
+ return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); |
+} |
+ |
+/** |
* xmlParseElementContentDecl: |
* @ctxt: an XML parser context |
* @name: the name of the element being defined. |
@@ -5660,7 +6177,7 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, |
tree = xmlParseElementMixedContentDecl(ctxt, inputid); |
res = XML_ELEMENT_TYPE_MIXED; |
} else { |
- tree = xmlParseElementChildrenContentDecl(ctxt, inputid); |
+ tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); |
res = XML_ELEMENT_TYPE_ELEMENT; |
} |
SKIP_BLANKS; |
@@ -5796,6 +6313,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { |
static void |
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { |
+ int id = ctxt->input->id; |
+ |
SKIP(3); |
SKIP_BLANKS; |
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { |
@@ -5804,6 +6323,11 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { |
if (RAW != '[') { |
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); |
} else { |
+ if (ctxt->input->id != id) { |
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, |
+ "All markup of the conditional section is not in the same entity\n", |
+ NULL, NULL); |
+ } |
NEXT; |
} |
if (xmlParserDebugEntities) { |
@@ -5859,6 +6383,11 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { |
if (RAW != '[') { |
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); |
} else { |
+ if (ctxt->input->id != id) { |
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, |
+ "All markup of the conditional section is not in the same entity\n", |
+ NULL, NULL); |
+ } |
NEXT; |
} |
if (xmlParserDebugEntities) { |
@@ -5915,6 +6444,11 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { |
if (RAW == 0) { |
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); |
} else { |
+ if (ctxt->input->id != id) { |
+ xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, |
+ "All markup of the conditional section is not in the same entity\n", |
+ NULL, NULL); |
+ } |
SKIP(3); |
} |
} |
@@ -5993,12 +6527,10 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { |
/** |
* xmlParseTextDecl: |
* @ctxt: an XML parser context |
- * |
+ * |
* parse an XML declaration header for external entities |
* |
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' |
- * |
- * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? |
*/ |
void |
@@ -6109,6 +6641,11 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, |
} |
if (ctxt->myDoc == NULL) { |
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); |
+ if (ctxt->myDoc == NULL) { |
+ xmlErrMemory(ctxt, "New Doc failed"); |
+ return; |
+ } |
+ ctxt->myDoc->properties = XML_DOC_INTERNAL; |
} |
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) |
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); |
@@ -6152,7 +6689,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, |
/** |
* xmlParseReference: |
* @ctxt: an XML parser context |
- * |
+ * |
* parse and handle entity references in content, depending on the SAX |
* interface, this may end-up in a call to character() if this is a |
* CharRef, a predefined entity, if there is no reference() callback. |
@@ -6164,14 +6701,23 @@ void |
xmlParseReference(xmlParserCtxtPtr ctxt) { |
xmlEntityPtr ent; |
xmlChar *val; |
- if (RAW != '&') return; |
+ int was_checked; |
+ xmlNodePtr list = NULL; |
+ xmlParserErrors ret = XML_ERR_OK; |
+ |
+ if (RAW != '&') |
+ return; |
+ |
+ /* |
+ * Simple case of a CharRef |
+ */ |
if (NXT(1) == '#') { |
int i = 0; |
xmlChar out[10]; |
int hex = NXT(2); |
int value = xmlParseCharRef(ctxt); |
- |
+ |
if (value == 0) |
return; |
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { |
@@ -6205,360 +6751,342 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { |
(!ctxt->disableSAX)) |
ctxt->sax->characters(ctxt->userData, out, i); |
} |
- } else { |
- int was_checked; |
+ return; |
+ } |
- ent = xmlParseEntityRef(ctxt); |
- if (ent == NULL) return; |
- if (!ctxt->wellFormed) |
- return; |
- was_checked = ent->checked; |
- if ((ent->name != NULL) && |
- (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { |
- xmlNodePtr list = NULL; |
- xmlParserErrors ret = XML_ERR_OK; |
+ /* |
+ * We are seeing an entity reference |
+ */ |
+ ent = xmlParseEntityRef(ctxt); |
+ if (ent == NULL) return; |
+ if (!ctxt->wellFormed) |
+ return; |
+ was_checked = ent->checked; |
+ /* special case of predefined entities */ |
+ if ((ent->name == NULL) || |
+ (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
+ val = ent->content; |
+ if (val == NULL) return; |
+ /* |
+ * inline the entity. |
+ */ |
+ if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && |
+ (!ctxt->disableSAX)) |
+ ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); |
+ return; |
+ } |
- /* |
- * The first reference to the entity trigger a parsing phase |
- * where the ent->children is filled with the result from |
- * the parsing. |
- */ |
- if (ent->checked == 0) { |
- xmlChar *value; |
+ /* |
+ * The first reference to the entity trigger a parsing phase |
+ * where the ent->children is filled with the result from |
+ * the parsing. |
+ */ |
+ if (ent->checked == 0) { |
+ unsigned long oldnbent = ctxt->nbentities; |
- value = ent->content; |
+ /* |
+ * This is a bit hackish but this seems the best |
+ * way to make sure both SAX and DOM entity support |
+ * behaves okay. |
+ */ |
+ void *user_data; |
+ if (ctxt->userData == ctxt) |
+ user_data = NULL; |
+ else |
+ user_data = ctxt->userData; |
- /* |
- * Check that this entity is well formed |
- */ |
- if ((value != NULL) && (value[0] != 0) && |
- (value[1] == 0) && (value[0] == '<') && |
- (xmlStrEqual(ent->name, BAD_CAST "lt"))) { |
- /* |
- * DONE: get definite answer on this !!! |
- * Lots of entity decls are used to declare a single |
- * char |
- * <!ENTITY lt "<"> |
- * Which seems to be valid since |
- * 2.4: The ampersand character (&) and the left angle |
- * bracket (<) may appear in their literal form only |
- * when used ... They are also legal within the literal |
- * entity value of an internal entity declaration;i |
- * see "4.3.2 Well-Formed Parsed Entities". |
- * IMHO 2.4 and 4.3.2 are directly in contradiction. |
- * Looking at the OASIS test suite and James Clark |
- * tests, this is broken. However the XML REC uses |
- * it. Is the XML REC not well-formed ???? |
- * This is a hack to avoid this problem |
- * |
- * ANSWER: since lt gt amp .. are already defined, |
- * this is a redefinition and hence the fact that the |
- * content is not well balanced is not a Wf error, this |
- * is lousy but acceptable. |
- */ |
- list = xmlNewDocText(ctxt->myDoc, value); |
- if (list != NULL) { |
- if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && |
- (ent->children == NULL)) { |
- ent->children = list; |
- ent->last = list; |
- ent->owner = 1; |
- list->parent = (xmlNodePtr) ent; |
- } else { |
- xmlFreeNodeList(list); |
- } |
- } else if (list != NULL) { |
- xmlFreeNodeList(list); |
- } |
- } else { |
- /* |
- * 4.3.2: An internal general parsed entity is well-formed |
- * if its replacement text matches the production labeled |
- * content. |
- */ |
+ /* |
+ * Check that this entity is well formed |
+ * 4.3.2: An internal general parsed entity is well-formed |
+ * if its replacement text matches the production labeled |
+ * content. |
+ */ |
+ if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { |
+ ctxt->depth++; |
+ ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, |
+ user_data, &list); |
+ ctxt->depth--; |
+ |
+ } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { |
+ ctxt->depth++; |
+ ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, |
+ user_data, ctxt->depth, ent->URI, |
+ ent->ExternalID, &list); |
+ ctxt->depth--; |
+ } else { |
+ ret = XML_ERR_ENTITY_PE_INTERNAL; |
+ xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, |
+ "invalid entity type found\n", NULL); |
+ } |
- void *user_data; |
+ /* |
+ * Store the number of entities needing parsing for this entity |
+ * content and do checkings |
+ */ |
+ ent->checked = ctxt->nbentities - oldnbent; |
+ if (ret == XML_ERR_ENTITY_LOOP) { |
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
+ xmlFreeNodeList(list); |
+ return; |
+ } |
+ if (xmlParserEntityCheck(ctxt, 0, ent)) { |
+ xmlFreeNodeList(list); |
+ return; |
+ } |
+ |
+ if ((ret == XML_ERR_OK) && (list != NULL)) { |
+ if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || |
+ (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& |
+ (ent->children == NULL)) { |
+ ent->children = list; |
+ if (ctxt->replaceEntities) { |
/* |
- * This is a bit hackish but this seems the best |
- * way to make sure both SAX and DOM entity support |
- * behaves okay. |
+ * Prune it directly in the generated document |
+ * except for single text nodes. |
*/ |
- if (ctxt->userData == ctxt) |
- user_data = NULL; |
- else |
- user_data = ctxt->userData; |
- |
- if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { |
- ctxt->depth++; |
- ret = xmlParseBalancedChunkMemoryInternal(ctxt, |
- value, user_data, &list); |
- ctxt->depth--; |
- } else if (ent->etype == |
- XML_EXTERNAL_GENERAL_PARSED_ENTITY) { |
- ctxt->depth++; |
- ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, |
- ctxt->sax, user_data, ctxt->depth, |
- ent->URI, ent->ExternalID, &list); |
- ctxt->depth--; |
+ if (((list->type == XML_TEXT_NODE) && |
+ (list->next == NULL)) || |
+ (ctxt->parseMode == XML_PARSE_READER)) { |
+ list->parent = (xmlNodePtr) ent; |
+ list = NULL; |
+ ent->owner = 1; |
} else { |
- ret = XML_ERR_ENTITY_PE_INTERNAL; |
- xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, |
- "invalid entity type found\n", NULL); |
- } |
- if (ret == XML_ERR_ENTITY_LOOP) { |
- xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
- return; |
- } else if ((ret == XML_ERR_OK) && (list != NULL)) { |
- if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || |
- (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& |
- (ent->children == NULL)) { |
- ent->children = list; |
- if (ctxt->replaceEntities) { |
- /* |
- * Prune it directly in the generated document |
- * except for single text nodes. |
- */ |
- if (((list->type == XML_TEXT_NODE) && |
- (list->next == NULL)) || |
- (ctxt->parseMode == XML_PARSE_READER)) { |
- list->parent = (xmlNodePtr) ent; |
- list = NULL; |
- ent->owner = 1; |
- } else { |
- ent->owner = 0; |
- while (list != NULL) { |
- list->parent = (xmlNodePtr) ctxt->node; |
- list->doc = ctxt->myDoc; |
- if (list->next == NULL) |
- ent->last = list; |
- list = list->next; |
- } |
- list = ent->children; |
+ ent->owner = 0; |
+ while (list != NULL) { |
+ list->parent = (xmlNodePtr) ctxt->node; |
+ list->doc = ctxt->myDoc; |
+ if (list->next == NULL) |
+ ent->last = list; |
+ list = list->next; |
+ } |
+ list = ent->children; |
#ifdef LIBXML_LEGACY_ENABLED |
- if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) |
- xmlAddEntityReference(ent, list, NULL); |
+ if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) |
+ xmlAddEntityReference(ent, list, NULL); |
#endif /* LIBXML_LEGACY_ENABLED */ |
- } |
- } else { |
- ent->owner = 1; |
- while (list != NULL) { |
- list->parent = (xmlNodePtr) ent; |
- if (list->next == NULL) |
- ent->last = list; |
- list = list->next; |
- } |
- } |
- } else { |
- xmlFreeNodeList(list); |
- list = NULL; |
- } |
- } else if ((ret != XML_ERR_OK) && |
- (ret != XML_WAR_UNDECLARED_ENTITY)) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
- "Entity '%s' failed to parse\n", ent->name); |
- } else if (list != NULL) { |
- xmlFreeNodeList(list); |
- list = NULL; |
+ } |
+ } else { |
+ ent->owner = 1; |
+ while (list != NULL) { |
+ list->parent = (xmlNodePtr) ent; |
+ if (list->next == NULL) |
+ ent->last = list; |
+ list = list->next; |
} |
} |
- ent->checked = 1; |
+ } else { |
+ xmlFreeNodeList(list); |
+ list = NULL; |
} |
+ } else if ((ret != XML_ERR_OK) && |
+ (ret != XML_WAR_UNDECLARED_ENTITY)) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
+ "Entity '%s' failed to parse\n", ent->name); |
+ } else if (list != NULL) { |
+ xmlFreeNodeList(list); |
+ list = NULL; |
+ } |
+ if (ent->checked == 0) |
+ ent->checked = 1; |
+ } else if (ent->checked != 1) { |
+ ctxt->nbentities += ent->checked; |
+ } |
- if (ent->children == NULL) { |
- /* |
- * Probably running in SAX mode and the callbacks don't |
- * build the entity content. So unless we already went |
- * though parsing for first checking go though the entity |
- * content to generate callbacks associated to the entity |
- */ |
- if (was_checked == 1) { |
- void *user_data; |
- /* |
- * This is a bit hackish but this seems the best |
- * way to make sure both SAX and DOM entity support |
- * behaves okay. |
- */ |
- if (ctxt->userData == ctxt) |
- user_data = NULL; |
- else |
- user_data = ctxt->userData; |
- |
- if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { |
- ctxt->depth++; |
- ret = xmlParseBalancedChunkMemoryInternal(ctxt, |
- ent->content, user_data, NULL); |
- ctxt->depth--; |
- } else if (ent->etype == |
- XML_EXTERNAL_GENERAL_PARSED_ENTITY) { |
- ctxt->depth++; |
- ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, |
- ctxt->sax, user_data, ctxt->depth, |
- ent->URI, ent->ExternalID, NULL); |
- ctxt->depth--; |
- } else { |
- ret = XML_ERR_ENTITY_PE_INTERNAL; |
- xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, |
- "invalid entity type found\n", NULL); |
- } |
- if (ret == XML_ERR_ENTITY_LOOP) { |
- xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
- return; |
- } |
- } |
- if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && |
- (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { |
- /* |
- * Entity reference callback comes second, it's somewhat |
- * superfluous but a compatibility to historical behaviour |
- */ |
- ctxt->sax->reference(ctxt->userData, ent->name); |
- } |
- return; |
+ /* |
+ * Now that the entity content has been gathered |
+ * provide it to the application, this can take different forms based |
+ * on the parsing modes. |
+ */ |
+ if (ent->children == NULL) { |
+ /* |
+ * Probably running in SAX mode and the callbacks don't |
+ * build the entity content. So unless we already went |
+ * though parsing for first checking go though the entity |
+ * content to generate callbacks associated to the entity |
+ */ |
+ if (was_checked != 0) { |
+ void *user_data; |
+ /* |
+ * This is a bit hackish but this seems the best |
+ * way to make sure both SAX and DOM entity support |
+ * behaves okay. |
+ */ |
+ if (ctxt->userData == ctxt) |
+ user_data = NULL; |
+ else |
+ user_data = ctxt->userData; |
+ |
+ if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { |
+ ctxt->depth++; |
+ ret = xmlParseBalancedChunkMemoryInternal(ctxt, |
+ ent->content, user_data, NULL); |
+ ctxt->depth--; |
+ } else if (ent->etype == |
+ XML_EXTERNAL_GENERAL_PARSED_ENTITY) { |
+ ctxt->depth++; |
+ ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, |
+ ctxt->sax, user_data, ctxt->depth, |
+ ent->URI, ent->ExternalID, NULL); |
+ ctxt->depth--; |
+ } else { |
+ ret = XML_ERR_ENTITY_PE_INTERNAL; |
+ xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, |
+ "invalid entity type found\n", NULL); |
} |
- if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && |
- (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { |
- /* |
- * Create a node. |
- */ |
- ctxt->sax->reference(ctxt->userData, ent->name); |
+ if (ret == XML_ERR_ENTITY_LOOP) { |
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
return; |
} |
- if ((ctxt->replaceEntities) || (ent->children == NULL)) { |
+ } |
+ if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && |
+ (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { |
+ /* |
+ * Entity reference callback comes second, it's somewhat |
+ * superfluous but a compatibility to historical behaviour |
+ */ |
+ ctxt->sax->reference(ctxt->userData, ent->name); |
+ } |
+ return; |
+ } |
+ |
+ /* |
+ * If we didn't get any children for the entity being built |
+ */ |
+ if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && |
+ (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { |
+ /* |
+ * Create a node. |
+ */ |
+ ctxt->sax->reference(ctxt->userData, ent->name); |
+ return; |
+ } |
+ |
+ if ((ctxt->replaceEntities) || (ent->children == NULL)) { |
+ /* |
+ * There is a problem on the handling of _private for entities |
+ * (bug 155816): Should we copy the content of the field from |
+ * the entity (possibly overwriting some value set by the user |
+ * when a copy is created), should we leave it alone, or should |
+ * we try to take care of different situations? The problem |
+ * is exacerbated by the usage of this field by the xmlReader. |
+ * To fix this bug, we look at _private on the created node |
+ * and, if it's NULL, we copy in whatever was in the entity. |
+ * If it's not NULL we leave it alone. This is somewhat of a |
+ * hack - maybe we should have further tests to determine |
+ * what to do. |
+ */ |
+ if ((ctxt->node != NULL) && (ent->children != NULL)) { |
+ /* |
+ * Seems we are generating the DOM content, do |
+ * a simple tree copy for all references except the first |
+ * In the first occurrence list contains the replacement. |
+ * progressive == 2 means we are operating on the Reader |
+ * and since nodes are discarded we must copy all the time. |
+ */ |
+ if (((list == NULL) && (ent->owner == 0)) || |
+ (ctxt->parseMode == XML_PARSE_READER)) { |
+ xmlNodePtr nw = NULL, cur, firstChild = NULL; |
+ |
/* |
- * There is a problem on the handling of _private for entities |
- * (bug 155816): Should we copy the content of the field from |
- * the entity (possibly overwriting some value set by the user |
- * when a copy is created), should we leave it alone, or should |
- * we try to take care of different situations? The problem |
- * is exacerbated by the usage of this field by the xmlReader. |
- * To fix this bug, we look at _private on the created node |
- * and, if it's NULL, we copy in whatever was in the entity. |
- * If it's not NULL we leave it alone. This is somewhat of a |
- * hack - maybe we should have further tests to determine |
- * what to do. |
+ * when operating on a reader, the entities definitions |
+ * are always owning the entities subtree. |
+ if (ctxt->parseMode == XML_PARSE_READER) |
+ ent->owner = 1; |
*/ |
- if ((ctxt->node != NULL) && (ent->children != NULL)) { |
- /* |
- * Seems we are generating the DOM content, do |
- * a simple tree copy for all references except the first |
- * In the first occurrence list contains the replacement. |
- * progressive == 2 means we are operating on the Reader |
- * and since nodes are discarded we must copy all the time. |
- */ |
- if (((list == NULL) && (ent->owner == 0)) || |
- (ctxt->parseMode == XML_PARSE_READER)) { |
- xmlNodePtr nw = NULL, cur, firstChild = NULL; |
+ cur = ent->children; |
+ while (cur != NULL) { |
+ nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); |
+ if (nw != NULL) { |
+ if (nw->_private == NULL) |
+ nw->_private = cur->_private; |
+ if (firstChild == NULL){ |
+ firstChild = nw; |
+ } |
+ nw = xmlAddChild(ctxt->node, nw); |
+ } |
+ if (cur == ent->last) { |
/* |
- * when operating on a reader, the entities definitions |
- * are always owning the entities subtree. |
- if (ctxt->parseMode == XML_PARSE_READER) |
- ent->owner = 1; |
+ * needed to detect some strange empty |
+ * node cases in the reader tests |
*/ |
+ if ((ctxt->parseMode == XML_PARSE_READER) && |
+ (nw != NULL) && |
+ (nw->type == XML_ELEMENT_NODE) && |
+ (nw->children == NULL)) |
+ nw->extra = 1; |
- cur = ent->children; |
- while (cur != NULL) { |
- nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); |
- if (nw != NULL) { |
- if (nw->_private == NULL) |
- nw->_private = cur->_private; |
- if (firstChild == NULL){ |
- firstChild = nw; |
- } |
- nw = xmlAddChild(ctxt->node, nw); |
- } |
- if (cur == ent->last) { |
- /* |
- * needed to detect some strange empty |
- * node cases in the reader tests |
- */ |
- if ((ctxt->parseMode == XML_PARSE_READER) && |
- (nw != NULL) && |
- (nw->type == XML_ELEMENT_NODE) && |
- (nw->children == NULL)) |
- nw->extra = 1; |
- |
- break; |
- } |
- cur = cur->next; |
- } |
+ break; |
+ } |
+ cur = cur->next; |
+ } |
#ifdef LIBXML_LEGACY_ENABLED |
- if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) |
- xmlAddEntityReference(ent, firstChild, nw); |
+ if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) |
+ xmlAddEntityReference(ent, firstChild, nw); |
#endif /* LIBXML_LEGACY_ENABLED */ |
- } else if (list == NULL) { |
- xmlNodePtr nw = NULL, cur, next, last, |
- firstChild = NULL; |
- /* |
- * Copy the entity child list and make it the new |
- * entity child list. The goal is to make sure any |
- * ID or REF referenced will be the one from the |
- * document content and not the entity copy. |
- */ |
- cur = ent->children; |
- ent->children = NULL; |
- last = ent->last; |
- ent->last = NULL; |
- while (cur != NULL) { |
- next = cur->next; |
- cur->next = NULL; |
- cur->parent = NULL; |
- nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); |
- if (nw != NULL) { |
- if (nw->_private == NULL) |
- nw->_private = cur->_private; |
- if (firstChild == NULL){ |
- firstChild = cur; |
- } |
- xmlAddChild((xmlNodePtr) ent, nw); |
- xmlAddChild(ctxt->node, cur); |
- } |
- if (cur == last) |
- break; |
- cur = next; |
+ } else if (list == NULL) { |
+ xmlNodePtr nw = NULL, cur, next, last, |
+ firstChild = NULL; |
+ /* |
+ * Copy the entity child list and make it the new |
+ * entity child list. The goal is to make sure any |
+ * ID or REF referenced will be the one from the |
+ * document content and not the entity copy. |
+ */ |
+ cur = ent->children; |
+ ent->children = NULL; |
+ last = ent->last; |
+ ent->last = NULL; |
+ while (cur != NULL) { |
+ next = cur->next; |
+ cur->next = NULL; |
+ cur->parent = NULL; |
+ nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); |
+ if (nw != NULL) { |
+ if (nw->_private == NULL) |
+ nw->_private = cur->_private; |
+ if (firstChild == NULL){ |
+ firstChild = cur; |
} |
- ent->owner = 1; |
+ xmlAddChild((xmlNodePtr) ent, nw); |
+ xmlAddChild(ctxt->node, cur); |
+ } |
+ if (cur == last) |
+ break; |
+ cur = next; |
+ } |
+ if (ent->owner == 0) |
+ ent->owner = 1; |
#ifdef LIBXML_LEGACY_ENABLED |
- if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) |
- xmlAddEntityReference(ent, firstChild, nw); |
+ if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) |
+ xmlAddEntityReference(ent, firstChild, nw); |
#endif /* LIBXML_LEGACY_ENABLED */ |
- } else { |
- const xmlChar *nbktext; |
- |
- /* |
- * the name change is to avoid coalescing of the |
- * node with a possible previous text one which |
- * would make ent->children a dangling pointer |
- */ |
- nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", |
- -1); |
- if (ent->children->type == XML_TEXT_NODE) |
- ent->children->name = nbktext; |
- if ((ent->last != ent->children) && |
- (ent->last->type == XML_TEXT_NODE)) |
- ent->last->name = nbktext; |
- xmlAddChildList(ctxt->node, ent->children); |
- } |
+ } else { |
+ const xmlChar *nbktext; |
- /* |
- * This is to avoid a nasty side effect, see |
- * characters() in SAX.c |
- */ |
- ctxt->nodemem = 0; |
- ctxt->nodelen = 0; |
- return; |
- } |
+ /* |
+ * the name change is to avoid coalescing of the |
+ * node with a possible previous text one which |
+ * would make ent->children a dangling pointer |
+ */ |
+ nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", |
+ -1); |
+ if (ent->children->type == XML_TEXT_NODE) |
+ ent->children->name = nbktext; |
+ if ((ent->last != ent->children) && |
+ (ent->last->type == XML_TEXT_NODE)) |
+ ent->last->name = nbktext; |
+ xmlAddChildList(ctxt->node, ent->children); |
} |
- } else { |
- val = ent->content; |
- if (val == NULL) return; |
+ |
/* |
- * inline the entity. |
+ * This is to avoid a nasty side effect, see |
+ * characters() in SAX.c |
*/ |
- if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && |
- (!ctxt->disableSAX)) |
- ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); |
+ ctxt->nodemem = 0; |
+ ctxt->nodelen = 0; |
+ return; |
} |
} |
} |
@@ -6597,132 +7125,146 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { |
xmlEntityPtr ent = NULL; |
GROW; |
- |
- if (RAW == '&') { |
- NEXT; |
- name = xmlParseName(ctxt); |
- if (name == NULL) { |
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
- "xmlParseEntityRef: no name\n"); |
- } else { |
- if (RAW == ';') { |
- NEXT; |
- /* |
- * Ask first SAX for entity resolution, otherwise try the |
- * predefined set. |
- */ |
- if (ctxt->sax != NULL) { |
- if (ctxt->sax->getEntity != NULL) |
- ent = ctxt->sax->getEntity(ctxt->userData, name); |
- if ((ctxt->wellFormed == 1 ) && (ent == NULL)) |
- ent = xmlGetPredefinedEntity(name); |
- if ((ctxt->wellFormed == 1 ) && (ent == NULL) && |
- (ctxt->userData==ctxt)) { |
- ent = xmlSAX2GetEntity(ctxt, name); |
- } |
- } |
- /* |
- * [ WFC: Entity Declared ] |
- * In a document without any DTD, a document with only an |
- * internal DTD subset which contains no parameter entity |
- * references, or a document with "standalone='yes'", the |
- * Name given in the entity reference must match that in an |
- * entity declaration, except that well-formed documents |
- * need not declare any of the following entities: amp, lt, |
- * gt, apos, quot. |
- * The declaration of a parameter entity must precede any |
- * reference to it. |
- * Similarly, the declaration of a general entity must |
- * precede any reference to it which appears in a default |
- * value in an attribute-list declaration. Note that if |
- * entities are declared in the external subset or in |
- * external parameter entities, a non-validating processor |
- * is not obligated to read and process their declarations; |
- * for such documents, the rule that an entity must be |
- * declared is a well-formedness constraint only if |
- * standalone='yes'. |
- */ |
- if (ent == NULL) { |
- if ((ctxt->standalone == 1) || |
- ((ctxt->hasExternalSubset == 0) && |
- (ctxt->hasPErefs == 0))) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
- "Entity '%s' not defined\n", name); |
- } else { |
- xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, |
- "Entity '%s' not defined\n", name); |
- if ((ctxt->inSubset == 0) && |
- (ctxt->sax != NULL) && |
- (ctxt->sax->reference != NULL)) { |
- ctxt->sax->reference(ctxt->userData, name); |
- } |
- } |
- ctxt->valid = 0; |
- } |
- /* |
- * [ WFC: Parsed Entity ] |
- * An entity reference must not contain the name of an |
- * unparsed entity |
- */ |
- else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, |
- "Entity reference to unparsed entity %s\n", name); |
- } |
- |
- /* |
- * [ WFC: No External Entity References ] |
- * Attribute values cannot contain direct or indirect |
- * entity references to external entities. |
- */ |
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
- (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, |
- "Attribute references external entity '%s'\n", name); |
- } |
- /* |
- * [ WFC: No < in Attribute Values ] |
- * The replacement text of any entity referred to directly or |
- * indirectly in an attribute value (other than "<") must |
- * not contain a <. |
- */ |
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
- (ent != NULL) && |
- (!xmlStrEqual(ent->name, BAD_CAST "lt")) && |
- (ent->content != NULL) && |
- (xmlStrchr(ent->content, '<'))) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, |
- "'<' in entity '%s' is not allowed in attributes values\n", name); |
- } |
+ if (RAW != '&') |
+ return(NULL); |
+ NEXT; |
+ name = xmlParseName(ctxt); |
+ if (name == NULL) { |
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
+ "xmlParseEntityRef: no name\n"); |
+ return(NULL); |
+ } |
+ if (RAW != ';') { |
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
+ return(NULL); |
+ } |
+ NEXT; |
- /* |
- * Internal check, no parameter entities here ... |
- */ |
- else { |
- switch (ent->etype) { |
- case XML_INTERNAL_PARAMETER_ENTITY: |
- case XML_EXTERNAL_PARAMETER_ENTITY: |
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, |
- "Attempt to reference the parameter entity '%s'\n", |
- name); |
- break; |
- default: |
- break; |
- } |
- } |
+ /* |
+ * Predefined entites override any extra definition |
+ */ |
+ if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { |
+ ent = xmlGetPredefinedEntity(name); |
+ if (ent != NULL) |
+ return(ent); |
+ } |
- /* |
- * [ WFC: No Recursion ] |
- * A parsed entity must not contain a recursive reference |
- * to itself, either directly or indirectly. |
- * Done somewhere else |
- */ |
+ /* |
+ * Increate the number of entity references parsed |
+ */ |
+ ctxt->nbentities++; |
- } else { |
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
+ /* |
+ * Ask first SAX for entity resolution, otherwise try the |
+ * entities which may have stored in the parser context. |
+ */ |
+ if (ctxt->sax != NULL) { |
+ if (ctxt->sax->getEntity != NULL) |
+ ent = ctxt->sax->getEntity(ctxt->userData, name); |
+ if ((ctxt->wellFormed == 1 ) && (ent == NULL) && |
+ (ctxt->options & XML_PARSE_OLDSAX)) |
+ ent = xmlGetPredefinedEntity(name); |
+ if ((ctxt->wellFormed == 1 ) && (ent == NULL) && |
+ (ctxt->userData==ctxt)) { |
+ ent = xmlSAX2GetEntity(ctxt, name); |
+ } |
+ } |
+ /* |
+ * [ WFC: Entity Declared ] |
+ * In a document without any DTD, a document with only an |
+ * internal DTD subset which contains no parameter entity |
+ * references, or a document with "standalone='yes'", the |
+ * Name given in the entity reference must match that in an |
+ * entity declaration, except that well-formed documents |
+ * need not declare any of the following entities: amp, lt, |
+ * gt, apos, quot. |
+ * The declaration of a parameter entity must precede any |
+ * reference to it. |
+ * Similarly, the declaration of a general entity must |
+ * precede any reference to it which appears in a default |
+ * value in an attribute-list declaration. Note that if |
+ * entities are declared in the external subset or in |
+ * external parameter entities, a non-validating processor |
+ * is not obligated to read and process their declarations; |
+ * for such documents, the rule that an entity must be |
+ * declared is a well-formedness constraint only if |
+ * standalone='yes'. |
+ */ |
+ if (ent == NULL) { |
+ if ((ctxt->standalone == 1) || |
+ ((ctxt->hasExternalSubset == 0) && |
+ (ctxt->hasPErefs == 0))) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
+ "Entity '%s' not defined\n", name); |
+ } else { |
+ xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, |
+ "Entity '%s' not defined\n", name); |
+ if ((ctxt->inSubset == 0) && |
+ (ctxt->sax != NULL) && |
+ (ctxt->sax->reference != NULL)) { |
+ ctxt->sax->reference(ctxt->userData, name); |
} |
} |
+ ctxt->valid = 0; |
+ } |
+ |
+ /* |
+ * [ WFC: Parsed Entity ] |
+ * An entity reference must not contain the name of an |
+ * unparsed entity |
+ */ |
+ else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, |
+ "Entity reference to unparsed entity %s\n", name); |
+ } |
+ |
+ /* |
+ * [ WFC: No External Entity References ] |
+ * Attribute values cannot contain direct or indirect |
+ * entity references to external entities. |
+ */ |
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
+ (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, |
+ "Attribute references external entity '%s'\n", name); |
+ } |
+ /* |
+ * [ WFC: No < in Attribute Values ] |
+ * The replacement text of any entity referred to directly or |
+ * indirectly in an attribute value (other than "<") must |
+ * not contain a <. |
+ */ |
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
+ (ent != NULL) && (ent->content != NULL) && |
+ (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && |
+ (xmlStrchr(ent->content, '<'))) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, |
+ "'<' in entity '%s' is not allowed in attributes values\n", name); |
+ } |
+ |
+ /* |
+ * Internal check, no parameter entities here ... |
+ */ |
+ else { |
+ switch (ent->etype) { |
+ case XML_INTERNAL_PARAMETER_ENTITY: |
+ case XML_EXTERNAL_PARAMETER_ENTITY: |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, |
+ "Attempt to reference the parameter entity '%s'\n", |
+ name); |
+ break; |
+ default: |
+ break; |
+ } |
} |
+ |
+ /* |
+ * [ WFC: No Recursion ] |
+ * A parsed entity must not contain a recursive reference |
+ * to itself, either directly or indirectly. |
+ * Done somewhere else |
+ */ |
return(ent); |
} |
@@ -6757,7 +7299,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { |
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer |
* is updated to the current location in the string. |
*/ |
-xmlEntityPtr |
+static xmlEntityPtr |
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { |
xmlChar *name; |
const xmlChar *ptr; |
@@ -6768,129 +7310,151 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { |
return(NULL); |
ptr = *str; |
cur = *ptr; |
- if (cur == '&') { |
- ptr++; |
- cur = *ptr; |
- name = xmlParseStringName(ctxt, &ptr); |
- if (name == NULL) { |
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
- "xmlParseStringEntityRef: no name\n"); |
- } else { |
- if (*ptr == ';') { |
- ptr++; |
- /* |
- * Ask first SAX for entity resolution, otherwise try the |
- * predefined set. |
- */ |
- if (ctxt->sax != NULL) { |
- if (ctxt->sax->getEntity != NULL) |
- ent = ctxt->sax->getEntity(ctxt->userData, name); |
- if (ent == NULL) |
- ent = xmlGetPredefinedEntity(name); |
- if ((ent == NULL) && (ctxt->userData==ctxt)) { |
- ent = xmlSAX2GetEntity(ctxt, name); |
- } |
- } |
- /* |
- * [ WFC: Entity Declared ] |
- * In a document without any DTD, a document with only an |
- * internal DTD subset which contains no parameter entity |
- * references, or a document with "standalone='yes'", the |
- * Name given in the entity reference must match that in an |
- * entity declaration, except that well-formed documents |
- * need not declare any of the following entities: amp, lt, |
- * gt, apos, quot. |
- * The declaration of a parameter entity must precede any |
- * reference to it. |
- * Similarly, the declaration of a general entity must |
- * precede any reference to it which appears in a default |
- * value in an attribute-list declaration. Note that if |
- * entities are declared in the external subset or in |
- * external parameter entities, a non-validating processor |
- * is not obligated to read and process their declarations; |
- * for such documents, the rule that an entity must be |
- * declared is a well-formedness constraint only if |
- * standalone='yes'. |
- */ |
- if (ent == NULL) { |
- if ((ctxt->standalone == 1) || |
- ((ctxt->hasExternalSubset == 0) && |
- (ctxt->hasPErefs == 0))) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
- "Entity '%s' not defined\n", name); |
- } else { |
- xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, |
- "Entity '%s' not defined\n", |
- name); |
- } |
- /* TODO ? check regressions ctxt->valid = 0; */ |
- } |
+ if (cur != '&') |
+ return(NULL); |
- /* |
- * [ WFC: Parsed Entity ] |
- * An entity reference must not contain the name of an |
- * unparsed entity |
- */ |
- else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, |
- "Entity reference to unparsed entity %s\n", name); |
- } |
+ ptr++; |
+ name = xmlParseStringName(ctxt, &ptr); |
+ if (name == NULL) { |
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
+ "xmlParseStringEntityRef: no name\n"); |
+ *str = ptr; |
+ return(NULL); |
+ } |
+ if (*ptr != ';') { |
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
+ xmlFree(name); |
+ *str = ptr; |
+ return(NULL); |
+ } |
+ ptr++; |
- /* |
- * [ WFC: No External Entity References ] |
- * Attribute values cannot contain direct or indirect |
- * entity references to external entities. |
- */ |
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
- (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, |
- "Attribute references external entity '%s'\n", name); |
- } |
- /* |
- * [ WFC: No < in Attribute Values ] |
- * The replacement text of any entity referred to directly or |
- * indirectly in an attribute value (other than "<") must |
- * not contain a <. |
- */ |
- else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
- (ent != NULL) && |
- (!xmlStrEqual(ent->name, BAD_CAST "lt")) && |
- (ent->content != NULL) && |
- (xmlStrchr(ent->content, '<'))) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, |
- "'<' in entity '%s' is not allowed in attributes values\n", |
- name); |
- } |
- /* |
- * Internal check, no parameter entities here ... |
- */ |
- else { |
- switch (ent->etype) { |
- case XML_INTERNAL_PARAMETER_ENTITY: |
- case XML_EXTERNAL_PARAMETER_ENTITY: |
- xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, |
- "Attempt to reference the parameter entity '%s'\n", |
- name); |
- break; |
- default: |
- break; |
- } |
- } |
+ /* |
+ * Predefined entites override any extra definition |
+ */ |
+ if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { |
+ ent = xmlGetPredefinedEntity(name); |
+ if (ent != NULL) { |
+ xmlFree(name); |
+ *str = ptr; |
+ return(ent); |
+ } |
+ } |
- /* |
- * [ WFC: No Recursion ] |
- * A parsed entity must not contain a recursive reference |
- * to itself, either directly or indirectly. |
- * Done somewhere else |
- */ |
+ /* |
+ * Increate the number of entity references parsed |
+ */ |
+ ctxt->nbentities++; |
- } else { |
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
- } |
- xmlFree(name); |
+ /* |
+ * Ask first SAX for entity resolution, otherwise try the |
+ * entities which may have stored in the parser context. |
+ */ |
+ if (ctxt->sax != NULL) { |
+ if (ctxt->sax->getEntity != NULL) |
+ ent = ctxt->sax->getEntity(ctxt->userData, name); |
+ if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) |
+ ent = xmlGetPredefinedEntity(name); |
+ if ((ent == NULL) && (ctxt->userData==ctxt)) { |
+ ent = xmlSAX2GetEntity(ctxt, name); |
} |
} |
+ |
+ /* |
+ * [ WFC: Entity Declared ] |
+ * In a document without any DTD, a document with only an |
+ * internal DTD subset which contains no parameter entity |
+ * references, or a document with "standalone='yes'", the |
+ * Name given in the entity reference must match that in an |
+ * entity declaration, except that well-formed documents |
+ * need not declare any of the following entities: amp, lt, |
+ * gt, apos, quot. |
+ * The declaration of a parameter entity must precede any |
+ * reference to it. |
+ * Similarly, the declaration of a general entity must |
+ * precede any reference to it which appears in a default |
+ * value in an attribute-list declaration. Note that if |
+ * entities are declared in the external subset or in |
+ * external parameter entities, a non-validating processor |
+ * is not obligated to read and process their declarations; |
+ * for such documents, the rule that an entity must be |
+ * declared is a well-formedness constraint only if |
+ * standalone='yes'. |
+ */ |
+ if (ent == NULL) { |
+ if ((ctxt->standalone == 1) || |
+ ((ctxt->hasExternalSubset == 0) && |
+ (ctxt->hasPErefs == 0))) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
+ "Entity '%s' not defined\n", name); |
+ } else { |
+ xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, |
+ "Entity '%s' not defined\n", |
+ name); |
+ } |
+ /* TODO ? check regressions ctxt->valid = 0; */ |
+ } |
+ |
+ /* |
+ * [ WFC: Parsed Entity ] |
+ * An entity reference must not contain the name of an |
+ * unparsed entity |
+ */ |
+ else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, |
+ "Entity reference to unparsed entity %s\n", name); |
+ } |
+ |
+ /* |
+ * [ WFC: No External Entity References ] |
+ * Attribute values cannot contain direct or indirect |
+ * entity references to external entities. |
+ */ |
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
+ (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, |
+ "Attribute references external entity '%s'\n", name); |
+ } |
+ /* |
+ * [ WFC: No < in Attribute Values ] |
+ * The replacement text of any entity referred to directly or |
+ * indirectly in an attribute value (other than "<") must |
+ * not contain a <. |
+ */ |
+ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && |
+ (ent != NULL) && (ent->content != NULL) && |
+ (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && |
+ (xmlStrchr(ent->content, '<'))) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, |
+ "'<' in entity '%s' is not allowed in attributes values\n", |
+ name); |
+ } |
+ |
+ /* |
+ * Internal check, no parameter entities here ... |
+ */ |
+ else { |
+ switch (ent->etype) { |
+ case XML_INTERNAL_PARAMETER_ENTITY: |
+ case XML_EXTERNAL_PARAMETER_ENTITY: |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, |
+ "Attempt to reference the parameter entity '%s'\n", |
+ name); |
+ break; |
+ default: |
+ break; |
+ } |
+ } |
+ |
+ /* |
+ * [ WFC: No Recursion ] |
+ * A parsed entity must not contain a recursive reference |
+ * to itself, either directly or indirectly. |
+ * Done somewhere else |
+ */ |
+ |
+ xmlFree(name); |
*str = ptr; |
return(ent); |
} |
@@ -6931,90 +7495,101 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) |
xmlEntityPtr entity = NULL; |
xmlParserInputPtr input; |
- if (RAW == '%') { |
- NEXT; |
- name = xmlParseName(ctxt); |
- if (name == NULL) { |
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
- "xmlParsePEReference: no name\n"); |
- } else { |
- if (RAW == ';') { |
- NEXT; |
- if ((ctxt->sax != NULL) && |
- (ctxt->sax->getParameterEntity != NULL)) |
- entity = ctxt->sax->getParameterEntity(ctxt->userData, |
- name); |
- if (entity == NULL) { |
- /* |
- * [ WFC: Entity Declared ] |
- * In a document without any DTD, a document with only an |
- * internal DTD subset which contains no parameter entity |
- * references, or a document with "standalone='yes'", ... |
- * ... The declaration of a parameter entity must precede |
- * any reference to it... |
- */ |
- if ((ctxt->standalone == 1) || |
- ((ctxt->hasExternalSubset == 0) && |
- (ctxt->hasPErefs == 0))) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
- "PEReference: %%%s; not found\n", |
- name); |
- } else { |
- /* |
- * [ VC: Entity Declared ] |
- * In a document with an external subset or external |
- * parameter entities with "standalone='no'", ... |
- * ... The declaration of a parameter entity must |
- * precede any reference to it... |
- */ |
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
- "PEReference: %%%s; not found\n", |
- name, NULL); |
- ctxt->valid = 0; |
- } |
- } else { |
- /* |
- * Internal checking in case the entity quest barfed |
- */ |
- if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && |
- (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { |
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
- "Internal: %%%s; is not a parameter entity\n", |
- name, NULL); |
- } else if (ctxt->input->free != deallocblankswrapper) { |
- input = |
- xmlNewBlanksWrapperInputStream(ctxt, entity); |
- xmlPushInput(ctxt, input); |
- } else { |
- /* |
- * TODO !!! |
- * handle the extra spaces added before and after |
- * c.f. http://www.w3.org/TR/REC-xml#as-PE |
- */ |
- input = xmlNewEntityInputStream(ctxt, entity); |
- xmlPushInput(ctxt, input); |
- if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
- (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && |
- (IS_BLANK_CH(NXT(5)))) { |
- xmlParseTextDecl(ctxt); |
- if (ctxt->errNo == |
- XML_ERR_UNSUPPORTED_ENCODING) { |
- /* |
- * The XML REC instructs us to stop parsing |
- * right here |
- */ |
- ctxt->instate = XML_PARSER_EOF; |
- return; |
- } |
- } |
- } |
- } |
- ctxt->hasPErefs = 1; |
- } else { |
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
- } |
- } |
+ if (RAW != '%') |
+ return; |
+ NEXT; |
+ name = xmlParseName(ctxt); |
+ if (name == NULL) { |
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
+ "xmlParsePEReference: no name\n"); |
+ return; |
+ } |
+ if (RAW != ';') { |
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
+ return; |
} |
+ |
+ NEXT; |
+ |
+ /* |
+ * Increate the number of entity references parsed |
+ */ |
+ ctxt->nbentities++; |
+ |
+ /* |
+ * Request the entity from SAX |
+ */ |
+ if ((ctxt->sax != NULL) && |
+ (ctxt->sax->getParameterEntity != NULL)) |
+ entity = ctxt->sax->getParameterEntity(ctxt->userData, |
+ name); |
+ if (entity == NULL) { |
+ /* |
+ * [ WFC: Entity Declared ] |
+ * In a document without any DTD, a document with only an |
+ * internal DTD subset which contains no parameter entity |
+ * references, or a document with "standalone='yes'", ... |
+ * ... The declaration of a parameter entity must precede |
+ * any reference to it... |
+ */ |
+ if ((ctxt->standalone == 1) || |
+ ((ctxt->hasExternalSubset == 0) && |
+ (ctxt->hasPErefs == 0))) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
+ "PEReference: %%%s; not found\n", |
+ name); |
+ } else { |
+ /* |
+ * [ VC: Entity Declared ] |
+ * In a document with an external subset or external |
+ * parameter entities with "standalone='no'", ... |
+ * ... The declaration of a parameter entity must |
+ * precede any reference to it... |
+ */ |
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
+ "PEReference: %%%s; not found\n", |
+ name, NULL); |
+ ctxt->valid = 0; |
+ } |
+ } else { |
+ /* |
+ * Internal checking in case the entity quest barfed |
+ */ |
+ if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && |
+ (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { |
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
+ "Internal: %%%s; is not a parameter entity\n", |
+ name, NULL); |
+ } else if (ctxt->input->free != deallocblankswrapper) { |
+ input = xmlNewBlanksWrapperInputStream(ctxt, entity); |
+ if (xmlPushInput(ctxt, input) < 0) |
+ return; |
+ } else { |
+ /* |
+ * TODO !!! |
+ * handle the extra spaces added before and after |
+ * c.f. http://www.w3.org/TR/REC-xml#as-PE |
+ */ |
+ input = xmlNewEntityInputStream(ctxt, entity); |
+ if (xmlPushInput(ctxt, input) < 0) |
+ return; |
+ if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
+ (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && |
+ (IS_BLANK_CH(NXT(5)))) { |
+ xmlParseTextDecl(ctxt); |
+ if (ctxt->errNo == |
+ XML_ERR_UNSUPPORTED_ENCODING) { |
+ /* |
+ * The XML REC instructs us to stop parsing |
+ * right here |
+ */ |
+ ctxt->instate = XML_PARSER_EOF; |
+ return; |
+ } |
+ } |
+ } |
+ } |
+ ctxt->hasPErefs = 1; |
} |
/** |
@@ -7067,7 +7642,11 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { |
* Push the entity as the current input, read char by char |
* saving to the buffer until the end of the entity or an error |
*/ |
- xmlPushInput(ctxt, input); |
+ if (xmlPushInput(ctxt, input) < 0) { |
+ xmlBufferFree(buf); |
+ return(-1); |
+ } |
+ |
GROW; |
c = CUR_CHAR(l); |
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && |
@@ -7128,7 +7707,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { |
* Returns the string of the entity content. |
* str is updated to the current value of the index |
*/ |
-xmlEntityPtr |
+static xmlEntityPtr |
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { |
const xmlChar *ptr; |
xmlChar cur; |
@@ -7138,67 +7717,76 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { |
if ((str == NULL) || (*str == NULL)) return(NULL); |
ptr = *str; |
cur = *ptr; |
- if (cur == '%') { |
- ptr++; |
- cur = *ptr; |
- name = xmlParseStringName(ctxt, &ptr); |
- if (name == NULL) { |
- xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
- "xmlParseStringPEReference: no name\n"); |
+ if (cur != '%') |
+ return(NULL); |
+ ptr++; |
+ name = xmlParseStringName(ctxt, &ptr); |
+ if (name == NULL) { |
+ xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, |
+ "xmlParseStringPEReference: no name\n"); |
+ *str = ptr; |
+ return(NULL); |
+ } |
+ cur = *ptr; |
+ if (cur != ';') { |
+ xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
+ xmlFree(name); |
+ *str = ptr; |
+ return(NULL); |
+ } |
+ ptr++; |
+ |
+ /* |
+ * Increate the number of entity references parsed |
+ */ |
+ ctxt->nbentities++; |
+ |
+ /* |
+ * Request the entity from SAX |
+ */ |
+ if ((ctxt->sax != NULL) && |
+ (ctxt->sax->getParameterEntity != NULL)) |
+ entity = ctxt->sax->getParameterEntity(ctxt->userData, |
+ name); |
+ if (entity == NULL) { |
+ /* |
+ * [ WFC: Entity Declared ] |
+ * In a document without any DTD, a document with only an |
+ * internal DTD subset which contains no parameter entity |
+ * references, or a document with "standalone='yes'", ... |
+ * ... The declaration of a parameter entity must precede |
+ * any reference to it... |
+ */ |
+ if ((ctxt->standalone == 1) || |
+ ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
+ "PEReference: %%%s; not found\n", name); |
} else { |
- cur = *ptr; |
- if (cur == ';') { |
- ptr++; |
- cur = *ptr; |
- if ((ctxt->sax != NULL) && |
- (ctxt->sax->getParameterEntity != NULL)) |
- entity = ctxt->sax->getParameterEntity(ctxt->userData, |
- name); |
- if (entity == NULL) { |
- /* |
- * [ WFC: Entity Declared ] |
- * In a document without any DTD, a document with only an |
- * internal DTD subset which contains no parameter entity |
- * references, or a document with "standalone='yes'", ... |
- * ... The declaration of a parameter entity must precede |
- * any reference to it... |
- */ |
- if ((ctxt->standalone == 1) || |
- ((ctxt->hasExternalSubset == 0) && |
- (ctxt->hasPErefs == 0))) { |
- xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
- "PEReference: %%%s; not found\n", name); |
- } else { |
- /* |
- * [ VC: Entity Declared ] |
- * In a document with an external subset or external |
- * parameter entities with "standalone='no'", ... |
- * ... The declaration of a parameter entity must |
- * precede any reference to it... |
- */ |
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
- "PEReference: %%%s; not found\n", |
- name, NULL); |
- ctxt->valid = 0; |
- } |
- } else { |
- /* |
- * Internal checking in case the entity quest barfed |
- */ |
- if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && |
- (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { |
- xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
- "%%%s; is not a parameter entity\n", |
- name, NULL); |
- } |
- } |
- ctxt->hasPErefs = 1; |
- } else { |
- xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); |
- } |
- xmlFree(name); |
+ /* |
+ * [ VC: Entity Declared ] |
+ * In a document with an external subset or external |
+ * parameter entities with "standalone='no'", ... |
+ * ... The declaration of a parameter entity must |
+ * precede any reference to it... |
+ */ |
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
+ "PEReference: %%%s; not found\n", |
+ name, NULL); |
+ ctxt->valid = 0; |
+ } |
+ } else { |
+ /* |
+ * Internal checking in case the entity quest barfed |
+ */ |
+ if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && |
+ (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { |
+ xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
+ "%%%s; is not a parameter entity\n", |
+ name, NULL); |
} |
} |
+ ctxt->hasPErefs = 1; |
+ xmlFree(name); |
*str = ptr; |
return(entity); |
} |
@@ -7675,38 +8263,6 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) { |
* * |
************************************************************************/ |
-static const xmlChar * |
-xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { |
- int len = 0, l; |
- int c; |
- int count = 0; |
- |
- /* |
- * Handler for more complex cases |
- */ |
- GROW; |
- c = CUR_CHAR(l); |
- if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
- (!IS_LETTER(c) && (c != '_'))) { |
- return(NULL); |
- } |
- |
- while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
- ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
- (c == '.') || (c == '-') || (c == '_') || |
- (IS_COMBINING(c)) || |
- (IS_EXTENDER(c)))) { |
- if (count++ > 100) { |
- count = 0; |
- GROW; |
- } |
- len += l; |
- NEXTL(l); |
- c = CUR_CHAR(l); |
- } |
- return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
-} |
- |
/* |
* xmlGetNamespace: |
* @ctxt: an XML parser context |
@@ -7728,61 +8284,10 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { |
return(NULL); |
return(ctxt->nsTab[i + 1]); |
} |
- if (ctxt->nsParent) return xmlGetNamespace(ctxt->nsParent, prefix); |
return(NULL); |
} |
/** |
- * xmlParseNCName: |
- * @ctxt: an XML parser context |
- * @len: lenght of the string parsed |
- * |
- * parse an XML name. |
- * |
- * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | |
- * CombiningChar | Extender |
- * |
- * [5NS] NCName ::= (Letter | '_') (NCNameChar)* |
- * |
- * Returns the Name parsed or NULL |
- */ |
- |
-static const xmlChar * |
-xmlParseNCName(xmlParserCtxtPtr ctxt) { |
- const xmlChar *in; |
- const xmlChar *ret; |
- int count = 0; |
- |
- /* |
- * Accelerator for simple ASCII names |
- */ |
- in = ctxt->input->cur; |
- if (((*in >= 0x61) && (*in <= 0x7A)) || |
- ((*in >= 0x41) && (*in <= 0x5A)) || |
- (*in == '_')) { |
- in++; |
- while (((*in >= 0x61) && (*in <= 0x7A)) || |
- ((*in >= 0x41) && (*in <= 0x5A)) || |
- ((*in >= 0x30) && (*in <= 0x39)) || |
- (*in == '_') || (*in == '-') || |
- (*in == '.')) |
- in++; |
- if ((*in > 0) && (*in < 0x80)) { |
- count = in - ctxt->input->cur; |
- ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); |
- ctxt->input->cur = in; |
- ctxt->nbChars += count; |
- ctxt->input->col += count; |
- if (ret == NULL) { |
- xmlErrMemory(ctxt, NULL); |
- } |
- return(ret); |
- } |
- } |
- return(xmlParseNCNameComplex(ctxt)); |
-} |
- |
-/** |
* xmlParseQName: |
* @ctxt: an XML parser context |
* @prefix: pointer to store the prefix part |
@@ -7824,7 +8329,13 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { |
xmlNsErr(ctxt, XML_NS_ERR_QNAME, |
"Failed to parse QName '%s:'\n", p, NULL, NULL); |
- tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); |
+ l = xmlParseNmtoken(ctxt); |
+ if (l == NULL) |
+ tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); |
+ else { |
+ tmp = xmlBuildQName(l, p, NULL, 0); |
+ xmlFree((char *)l); |
+ } |
p = xmlDictLookup(ctxt->dict, tmp, -1); |
if (tmp != NULL) xmlFree(tmp); |
*prefix = NULL; |
@@ -7872,7 +8383,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { |
static const xmlChar * |
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, |
xmlChar const *prefix) { |
- const xmlChar *cmp = name; |
+ const xmlChar *cmp; |
const xmlChar *in; |
const xmlChar *ret; |
const xmlChar *prefix2; |
@@ -7881,7 +8392,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, |
GROW; |
in = ctxt->input->cur; |
- |
+ |
cmp = prefix; |
while (*in != 0 && *in == *cmp) { |
++in; |
@@ -8132,7 +8643,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, |
const xmlChar *val2; |
val2 = xmlAttrNormalizeSpace2(ctxt, val, len); |
- if (val2 != NULL) { |
+ if ((val2 != NULL) && (val2 != val)) { |
xmlFree(val); |
val = (xmlChar *) val2; |
} |
@@ -8295,17 +8806,33 @@ reparse: |
if (*URL != 0) { |
uri = xmlParseURI((const char *) URL); |
if (uri == NULL) { |
- xmlWarningMsg(ctxt, XML_WAR_NS_URI, |
- "xmlns: %s not a valid URI\n", |
- URL, NULL); |
+ xmlNsErr(ctxt, XML_WAR_NS_URI, |
+ "xmlns: '%s' is not a valid URI\n", |
+ URL, NULL, NULL); |
} else { |
if (uri->scheme == NULL) { |
- xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, |
- "xmlns: URI %s is not absolute\n", |
- URL, NULL); |
+ xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, |
+ "xmlns: URI %s is not absolute\n", |
+ URL, NULL, NULL); |
} |
xmlFreeURI(uri); |
} |
+ if (URL == ctxt->str_xml_ns) { |
+ if (attname != ctxt->str_xml) { |
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, |
+ "xml namespace URI cannot be the default namespace\n", |
+ NULL, NULL, NULL); |
+ } |
+ goto skip_default_ns; |
+ } |
+ if ((len == 29) && |
+ (xmlStrEqual(URL, |
+ BAD_CAST "http://www.w3.org/2000/xmlns/"))) { |
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, |
+ "reuse of the xmlns namespace name is forbidden\n", |
+ NULL, NULL, NULL); |
+ goto skip_default_ns; |
+ } |
} |
/* |
* check that it's not a defined namespace |
@@ -8317,6 +8844,7 @@ reparse: |
xmlErrAttributeDup(ctxt, NULL, attname); |
else |
if (nsPush(ctxt, NULL, URL) > 0) nbNs++; |
+skip_default_ns: |
if (alloc != 0) xmlFree(attvalue); |
SKIP_BLANKS; |
continue; |
@@ -8334,22 +8862,49 @@ reparse: |
/* |
* Do not keep a namespace definition node |
*/ |
- if (alloc != 0) xmlFree(attvalue); |
- SKIP_BLANKS; |
- continue; |
+ goto skip_ns; |
+ } |
+ if (URL == ctxt->str_xml_ns) { |
+ if (attname != ctxt->str_xml) { |
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, |
+ "xml namespace URI mapped to wrong prefix\n", |
+ NULL, NULL, NULL); |
+ } |
+ goto skip_ns; |
+ } |
+ if (attname == ctxt->str_xmlns) { |
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, |
+ "redefinition of the xmlns prefix is forbidden\n", |
+ NULL, NULL, NULL); |
+ goto skip_ns; |
} |
- uri = xmlParseURI((const char *) URL); |
- if (uri == NULL) { |
- xmlWarningMsg(ctxt, XML_WAR_NS_URI, |
- "xmlns:%s: '%s' is not a valid URI\n", |
- attname, URL); |
+ if ((len == 29) && |
+ (xmlStrEqual(URL, |
+ BAD_CAST "http://www.w3.org/2000/xmlns/"))) { |
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, |
+ "reuse of the xmlns namespace name is forbidden\n", |
+ NULL, NULL, NULL); |
+ goto skip_ns; |
+ } |
+ if ((URL == NULL) || (URL[0] == 0)) { |
+ xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, |
+ "xmlns:%s: Empty XML namespace is not allowed\n", |
+ attname, NULL, NULL); |
+ goto skip_ns; |
} else { |
- if ((ctxt->pedantic) && (uri->scheme == NULL)) { |
- xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, |
+ uri = xmlParseURI((const char *) URL); |
+ if (uri == NULL) { |
+ xmlNsErr(ctxt, XML_WAR_NS_URI, |
+ "xmlns:%s: '%s' is not a valid URI\n", |
+ attname, URL, NULL); |
+ } else { |
+ if ((ctxt->pedantic) && (uri->scheme == NULL)) { |
+ xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, |
"xmlns:%s: URI %s is not absolute\n", |
- attname, URL); |
+ attname, URL, NULL); |
+ } |
+ xmlFreeURI(uri); |
} |
- xmlFreeURI(uri); |
} |
/* |
@@ -8362,6 +8917,7 @@ reparse: |
xmlErrAttributeDup(ctxt, aprefix, attname); |
else |
if (nsPush(ctxt, attname, URL) > 0) nbNs++; |
+skip_ns: |
if (alloc != 0) xmlFree(attvalue); |
SKIP_BLANKS; |
if (ctxt->input->base != base) goto base_changed; |
@@ -8396,7 +8952,7 @@ reparse: |
xmlFree(attvalue); |
} |
-failed: |
+failed: |
GROW |
if (ctxt->input->base != base) goto base_changed; |
@@ -8427,8 +8983,8 @@ failed: |
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); |
if (defaults != NULL) { |
for (i = 0;i < defaults->nbAttrs;i++) { |
- attname = defaults->values[4 * i]; |
- aprefix = defaults->values[4 * i + 1]; |
+ attname = defaults->values[5 * i]; |
+ aprefix = defaults->values[5 * i + 1]; |
/* |
* special work for namespaces defaulted defs |
@@ -8443,9 +8999,9 @@ failed: |
if (j <= nbNs) continue; |
nsname = xmlGetNamespace(ctxt, NULL); |
- if (nsname != defaults->values[4 * i + 2]) { |
+ if (nsname != defaults->values[5 * i + 2]) { |
if (nsPush(ctxt, NULL, |
- defaults->values[4 * i + 2]) > 0) |
+ defaults->values[5 * i + 2]) > 0) |
nbNs++; |
} |
} else if (aprefix == ctxt->str_xmlns) { |
@@ -8460,7 +9016,7 @@ failed: |
nsname = xmlGetNamespace(ctxt, attname); |
if (nsname != defaults->values[2]) { |
if (nsPush(ctxt, attname, |
- defaults->values[4 * i + 2]) > 0) |
+ defaults->values[5 * i + 2]) > 0) |
nbNs++; |
} |
} else { |
@@ -8486,8 +9042,14 @@ failed: |
atts[nbatts++] = NULL; |
else |
atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); |
- atts[nbatts++] = defaults->values[4 * i + 2]; |
- atts[nbatts++] = defaults->values[4 * i + 3]; |
+ atts[nbatts++] = defaults->values[5 * i + 2]; |
+ atts[nbatts++] = defaults->values[5 * i + 3]; |
+ if ((ctxt->standalone == 1) && |
+ (defaults->values[5 * i + 4] != NULL)) { |
+ xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, |
+ "standalone: attribute %s on %s defaulted from external subset\n", |
+ attname, localname); |
+ } |
nbdef++; |
} |
} |
@@ -8644,6 +9206,8 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, |
*/ |
if (name != (xmlChar*)1) { |
if (name == NULL) name = BAD_CAST "unparseable"; |
+ if ((line == 0) && (ctxt->node != NULL)) |
+ line = ctxt->node->line; |
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, |
"Opening and ending tag mismatch: %s line %d and %s\n", |
ctxt->name, line, name); |
@@ -8863,17 +9427,18 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { |
void |
xmlParseElement(xmlParserCtxtPtr ctxt) { |
const xmlChar *name; |
- const xmlChar *prefix; |
- const xmlChar *URI; |
+ const xmlChar *prefix = NULL; |
+ const xmlChar *URI = NULL; |
xmlParserNodeInfo node_info; |
int line, tlen; |
xmlNodePtr ret; |
int nsNr = ctxt->nsNr; |
- if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) { |
- xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, |
- "Excessive depth in document: change xmlParserMaxDepth = %d\n", |
- xmlParserMaxDepth); |
+ if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && |
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) { |
+ xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, |
+ "Excessive depth in document: %d use XML_PARSE_HUGE option\n", |
+ xmlParserMaxDepth); |
ctxt->instate = XML_PARSER_EOF; |
return; |
} |
@@ -9027,7 +9592,9 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { |
* |
* parse the XML version value. |
* |
- * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ |
+ * [26] VersionNum ::= '1.' [0-9]+ |
+ * |
+ * In practice allow [0-9].[0-9]+ at that level |
* |
* Returns the string giving the XML version number, or NULL |
*/ |
@@ -9044,11 +9611,21 @@ xmlParseVersionNum(xmlParserCtxtPtr ctxt) { |
return(NULL); |
} |
cur = CUR; |
- while (((cur >= 'a') && (cur <= 'z')) || |
- ((cur >= 'A') && (cur <= 'Z')) || |
- ((cur >= '0') && (cur <= '9')) || |
- (cur == '_') || (cur == '.') || |
- (cur == ':') || (cur == '-')) { |
+ if (!((cur >= '0') && (cur <= '9'))) { |
+ xmlFree(buf); |
+ return(NULL); |
+ } |
+ buf[len++] = cur; |
+ NEXT; |
+ cur=CUR; |
+ if (cur != '.') { |
+ xmlFree(buf); |
+ return(NULL); |
+ } |
+ buf[len++] = cur; |
+ NEXT; |
+ cur=CUR; |
+ while ((cur >= '0') && (cur <= '9')) { |
if (len + 1 >= size) { |
xmlChar *tmp; |
@@ -9141,7 +9718,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { |
xmlErrMemory(ctxt, NULL); |
return(NULL); |
} |
- |
+ |
buf[len++] = cur; |
NEXT; |
cur = CUR; |
@@ -9229,6 +9806,18 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { |
if ((encoding != NULL) && |
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || |
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { |
+ /* |
+ * If no encoding was passed to the parser, that we are |
+ * using UTF-16 and no decoder is present i.e. the |
+ * document is apparently UTF-8 compatible, then raise an |
+ * encoding mismatch fatal error |
+ */ |
+ if ((ctxt->encoding == NULL) && |
+ (ctxt->input->buf != NULL) && |
+ (ctxt->input->buf->encoder == NULL)) { |
+ xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, |
+ "Document labelled UTF-16 but has UTF-8 content\n"); |
+ } |
if (ctxt->encoding != NULL) |
xmlFree((xmlChar *) ctxt->encoding); |
ctxt->encoding = encoding; |
@@ -9387,11 +9976,23 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { |
} else { |
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { |
/* |
- * TODO: Blueberry should be detected here |
+ * Changed here for XML-1.0 5th edition |
*/ |
- xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, |
- "Unsupported version '%s'\n", |
- version, NULL); |
+ if (ctxt->options & XML_PARSE_OLD10) { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, |
+ "Unsupported version '%s'\n", |
+ version); |
+ } else { |
+ if ((version[0] == '1') && ((version[1] == '.'))) { |
+ xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, |
+ "Unsupported version '%s'\n", |
+ version, NULL); |
+ } else { |
+ xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, |
+ "Unsupported version '%s'\n", |
+ version); |
+ } |
+ } |
} |
if (ctxt->version != NULL) |
xmlFree((void *) ctxt->version); |
@@ -9426,6 +10027,12 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { |
} |
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); |
} |
+ |
+ /* |
+ * We can grow the input buffer freely at that point |
+ */ |
+ GROW; |
+ |
SKIP_BLANKS; |
ctxt->input->standalone = xmlParseSDDecl(ctxt); |
@@ -9528,8 +10135,13 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { |
/* |
* Check for the XMLDecl in the Prolog. |
+ * do not GROW here to avoid the detected encoder to decode more |
+ * than just the first line, unless the amount of data is really |
+ * too small to hold "<?xml version="1.0" encoding="foo" |
*/ |
- GROW; |
+ if ((ctxt->input->end - ctxt->input->cur) < 35) { |
+ GROW; |
+ } |
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { |
/* |
@@ -9625,6 +10237,15 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { |
ctxt->myDoc = NULL; |
} |
+ if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { |
+ ctxt->myDoc->properties |= XML_DOC_WELLFORMED; |
+ if (ctxt->valid) |
+ ctxt->myDoc->properties |= XML_DOC_DTDVALID; |
+ if (ctxt->nsWellFormed) |
+ ctxt->myDoc->properties |= XML_DOC_NSVALID; |
+ if (ctxt->options & XML_PARSE_OLD10) |
+ ctxt->myDoc->properties |= XML_DOC_OLD10; |
+ } |
if (! ctxt->wellFormed) { |
ctxt->valid = 0; |
return(-1); |
@@ -10187,8 +10808,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { |
break; |
case XML_PARSER_START_TAG: { |
const xmlChar *name; |
- const xmlChar *prefix; |
- const xmlChar *URI; |
+ const xmlChar *prefix = NULL; |
+ const xmlChar *URI = NULL; |
int nsNr = ctxt->nsNr; |
if ((avail < 2) && (ctxt->inputNr == 1)) |
@@ -10902,6 +11523,7 @@ int |
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, |
int terminate) { |
int end_in_lf = 0; |
+ int remain = 0; |
if (ctxt == NULL) |
return(XML_ERR_INTERNAL_ERROR); |
@@ -10914,12 +11536,65 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, |
end_in_lf = 1; |
size--; |
} |
+ |
+xmldecl_done: |
+ |
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
int cur = ctxt->input->cur - ctxt->input->base; |
int res; |
- |
+ |
+ /* Chromium note: commenting out the following block of code is a gory |
+ * hack, meant to partially undo |
+ * http://git.gnome.org/browse/libxml2/commit/?id=a6c76a |
+ * |
+ * WebKit and libxml disagree about who is responsible for the |
+ * document encoding. |
+ * |
+ * This bug: |
+ * https://bugs.webkit.org/show_bug.cgi?id=30508 |
+ * has links to test cases, libxml bug reports, and mailing list threads |
+ * arguing about it; for now, though, we can just undo the change that |
+ * caused libxml to regress. |
+ */ |
+#if 0 |
+ /* |
+ * Specific handling if we autodetected an encoding, we should not |
+ * push more than the first line ... which depend on the encoding |
+ * And only push the rest once the final encoding was detected |
+ */ |
+ if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && |
+ (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { |
+ unsigned int len = 45; |
+ |
+ if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, |
+ BAD_CAST "UTF-16")) || |
+ (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, |
+ BAD_CAST "UTF16"))) |
+ len = 90; |
+ else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, |
+ BAD_CAST "UCS-4")) || |
+ (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, |
+ BAD_CAST "UCS4"))) |
+ len = 180; |
+ |
+ if (ctxt->input->buf->rawconsumed < len) |
+ len -= ctxt->input->buf->rawconsumed; |
+ |
+ /* |
+ * Change size for reading the initial declaration only |
+ * if size is greater than len. Otherwise, memmove in xmlBufferAdd |
+ * will blindly copy extra bytes from memory. |
+ */ |
+ if (size > len) { |
+ remain = size - len; |
+ size = len; |
+ } else { |
+ remain = 0; |
+ } |
+ } |
+#endif |
res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
if (res < 0) { |
ctxt->errNo = XML_PARSER_EOF; |
@@ -10940,7 +11615,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, |
if ((in->encoder != NULL) && (in->buffer != NULL) && |
(in->raw != NULL)) { |
int nbchars; |
- |
+ |
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); |
if (nbchars < 0) { |
/* TODO 2.6.0 */ |
@@ -10951,13 +11626,23 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, |
} |
} |
} |
- xmlParseTryOrFinish(ctxt, terminate); |
+ if (remain != 0) |
+ xmlParseTryOrFinish(ctxt, 0); |
+ else |
+ xmlParseTryOrFinish(ctxt, terminate); |
+ if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) |
+ return(ctxt->errNo); |
+ |
+ if (remain != 0) { |
+ chunk += size; |
+ size = remain; |
+ remain = 0; |
+ goto xmldecl_done; |
+ } |
if ((end_in_lf == 1) && (ctxt->input != NULL) && |
(ctxt->input->buf != NULL)) { |
xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); |
} |
- if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) |
- return(ctxt->errNo); |
if (terminate) { |
/* |
* Check for termination |
@@ -11269,7 +11954,11 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, |
/* |
* plug some encoding conversion routines here. |
*/ |
- xmlPushInput(ctxt, pinput); |
+ if (xmlPushInput(ctxt, pinput) < 0) { |
+ if (sax != NULL) ctxt->sax = NULL; |
+ xmlFreeParserCtxt(ctxt); |
+ return(NULL); |
+ } |
if (enc != XML_CHAR_ENCODING_NONE) { |
xmlSwitchEncoding(ctxt, enc); |
} |
@@ -11286,6 +11975,11 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, |
*/ |
ctxt->inSubset = 2; |
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); |
+ if (ctxt->myDoc == NULL) { |
+ xmlErrMemory(ctxt, "New Doc failed"); |
+ return(NULL); |
+ } |
+ ctxt->myDoc->properties = XML_DOC_INTERNAL; |
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", |
BAD_CAST "none", BAD_CAST "none"); |
@@ -11398,7 +12092,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, |
/* |
* plug some encoding conversion routines here. |
*/ |
- xmlPushInput(ctxt, input); |
+ if (xmlPushInput(ctxt, input) < 0) { |
+ if (sax != NULL) ctxt->sax = NULL; |
+ xmlFreeParserCtxt(ctxt); |
+ if (systemIdCanonic != NULL) |
+ xmlFree(systemIdCanonic); |
+ return(NULL); |
+ } |
if ((ctxt->input->end - ctxt->input->cur) >= 4) { |
enc = xmlDetectCharEncoding(ctxt->input->cur, 4); |
xmlSwitchEncoding(ctxt, enc); |
@@ -11419,6 +12119,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, |
*/ |
ctxt->inSubset = 2; |
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); |
+ if (ctxt->myDoc == NULL) { |
+ xmlErrMemory(ctxt, "New Doc failed"); |
+ if (sax != NULL) ctxt->sax = NULL; |
+ xmlFreeParserCtxt(ctxt); |
+ return(NULL); |
+ } |
+ ctxt->myDoc->properties = XML_DOC_INTERNAL; |
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", |
ExternalID, SystemID); |
xmlParseExternalSubset(ctxt, ExternalID, SystemID); |
@@ -11445,7 +12152,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, |
} |
if (sax != NULL) ctxt->sax = NULL; |
xmlFreeParserCtxt(ctxt); |
- |
+ |
return(ret); |
} |
@@ -11456,7 +12163,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, |
* @SystemID: a NAME* containing the URL to the DTD |
* |
* Load and parse an external subset. |
- * |
+ * |
* Returns the resulting xmlDtdPtr or NULL in case of error. |
*/ |
@@ -11499,12 +12206,11 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
int ret = 0; |
xmlChar start[4]; |
xmlCharEncoding enc; |
- xmlParserInputPtr inputStream; |
- char *directory = NULL; |
if (ctx == NULL) return(-1); |
- if (ctx->depth > 40) { |
+ if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || |
+ (ctx->depth > 1024)) { |
return(XML_ERR_ENTITY_LOOP); |
} |
@@ -11515,27 +12221,11 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ |
return(-1); |
- ctxt = xmlNewParserCtxt(); |
+ ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); |
if (ctxt == NULL) { |
return(-1); |
} |
- |
- ctxt->userData = ctxt; |
- ctxt->_private = ctx->_private; |
- |
- inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); |
- if (inputStream == NULL) { |
- xmlFreeParserCtxt(ctxt); |
- return(-1); |
- } |
- |
- inputPush(ctxt, inputStream); |
- if ((ctxt->directory == NULL) && (directory == NULL)) |
- directory = xmlParserGetDirectory((char *)URL); |
- if ((ctxt->directory == NULL) && (directory != NULL)) |
- ctxt->directory = directory; |
- |
oldsax = ctxt->sax; |
ctxt->sax = ctx->sax; |
xmlDetectSAX2(ctxt); |
@@ -11544,6 +12234,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
xmlFreeParserCtxt(ctxt); |
return(-1); |
} |
+ newDoc->properties = XML_DOC_INTERNAL; |
if (ctx->myDoc->dict) { |
newDoc->dict = ctx->myDoc->dict; |
xmlDictReference(newDoc->dict); |
@@ -11573,7 +12264,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
newDoc->children->doc = ctx->myDoc; |
} |
- /* |
+ /* |
* Get the 4 first bytes and decode the charset |
* if enc != XML_CHAR_ENCODING_NONE |
* plug some encoding conversion routines. |
@@ -11595,6 +12286,14 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
*/ |
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { |
xmlParseTextDecl(ctxt); |
+ /* |
+ * An XML-1.0 document can't reference an entity not XML-1.0 |
+ */ |
+ if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && |
+ (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { |
+ xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, |
+ "Version mismatch between document and entity\n"); |
+ } |
} |
/* |
@@ -11628,7 +12327,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
ctxt->linenumbers = ctx->linenumbers; |
xmlParseContent(ctxt); |
- |
+ |
ctx->validate = ctxt->validate; |
ctx->valid = ctxt->valid; |
if ((RAW == '<') && (NXT(1) == '/')) { |
@@ -11671,7 +12370,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
newDoc->intSubset = NULL; |
newDoc->extSubset = NULL; |
xmlFreeDoc(newDoc); |
- |
+ |
return(ret); |
} |
@@ -11705,12 +12404,12 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
xmlChar start[4]; |
xmlCharEncoding enc; |
- if (depth > 40) { |
+ if (((depth > 40) && |
+ ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || |
+ (depth > 1024)) { |
return(XML_ERR_ENTITY_LOOP); |
} |
- |
- |
if (list != NULL) |
*list = NULL; |
if ((URL == NULL) && (ID == NULL)) |
@@ -11719,7 +12418,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
return(XML_ERR_INTERNAL_ERROR); |
- ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); |
+ ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); |
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); |
ctxt->userData = ctxt; |
if (oldctxt != NULL) { |
@@ -11756,6 +12455,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
xmlFreeParserCtxt(ctxt); |
return(XML_ERR_INTERNAL_ERROR); |
} |
+ newDoc->properties = XML_DOC_INTERNAL; |
newDoc->intSubset = doc->intSubset; |
newDoc->extSubset = doc->extSubset; |
newDoc->dict = doc->dict; |
@@ -11782,7 +12482,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
ctxt->myDoc = doc; |
newRoot->doc = doc; |
- /* |
+ /* |
* Get the 4 first bytes and decode the charset |
* if enc != XML_CHAR_ENCODING_NONE |
* plug some encoding conversion routines. |
@@ -11810,7 +12510,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
ctxt->depth = depth; |
xmlParseContent(ctxt); |
- |
+ |
if ((RAW == '<') && (NXT(1) == '/')) { |
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); |
} else if (RAW != 0) { |
@@ -11843,6 +12543,27 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
} |
ret = XML_ERR_OK; |
} |
+ |
+ /* |
+ * Record in the parent context the number of entities replacement |
+ * done when parsing that reference. |
+ */ |
+ if (oldctxt != NULL) |
+ oldctxt->nbentities += ctxt->nbentities; |
+ |
+ /* |
+ * Also record the size of the entity parsed |
+ */ |
+ if (ctxt->input != NULL) { |
+ oldctxt->sizeentities += ctxt->input->consumed; |
+ oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); |
+ } |
+ /* |
+ * And record the last error if any |
+ */ |
+ if (ctxt->lastError.code != XML_ERR_OK) |
+ xmlCopyError(&ctxt->lastError, &oldctxt->lastError); |
+ |
if (sax != NULL) |
ctxt->sax = oldsax; |
oldctxt->node_seq.maximum = ctxt->node_seq.maximum; |
@@ -11855,7 +12576,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
newDoc->intSubset = NULL; |
newDoc->extSubset = NULL; |
xmlFreeDoc(newDoc); |
- |
+ |
return(ret); |
} |
@@ -11932,9 +12653,9 @@ xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, |
* |
* Returns XML_ERR_OK if the chunk is well balanced, and the parser |
* error code otherwise |
- * |
+ * |
* In case recover is set to 1, the nodelist will not be empty even if |
- * the parsed chunk is not well balanced. |
+ * the parsed chunk is not well balanced. |
*/ |
static xmlParserErrors |
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
@@ -11947,8 +12668,12 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
xmlNodePtr last = NULL; |
int size; |
xmlParserErrors ret = XML_ERR_OK; |
+#ifdef SAX2 |
+ int i; |
+#endif |
- if (oldctxt->depth > 40) { |
+ if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || |
+ (oldctxt->depth > 1024)) { |
return(XML_ERR_ENTITY_LOOP); |
} |
@@ -11972,14 +12697,19 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); |
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); |
- ctxt->nsParent = oldctxt; |
+#ifdef SAX2 |
+ /* propagate namespaces down the entity */ |
+ for (i = 0;i < oldctxt->nsNr;i += 2) { |
+ nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); |
+ } |
+#endif |
oldsax = ctxt->sax; |
ctxt->sax = oldctxt->sax; |
xmlDetectSAX2(ctxt); |
ctxt->replaceEntities = oldctxt->replaceEntities; |
ctxt->options = oldctxt->options; |
- |
+ |
ctxt->_private = oldctxt->_private; |
if (oldctxt->myDoc == NULL) { |
newDoc = xmlNewDoc(BAD_CAST "1.0"); |
@@ -11989,6 +12719,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
xmlFreeParserCtxt(ctxt); |
return(XML_ERR_INTERNAL_ERROR); |
} |
+ newDoc->properties = XML_DOC_INTERNAL; |
newDoc->dict = ctxt->dict; |
xmlDictReference(newDoc->dict); |
ctxt->myDoc = newDoc; |
@@ -12044,7 +12775,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
} else { |
ret = XML_ERR_OK; |
} |
- |
+ |
if ((lst != NULL) && (ret == XML_ERR_OK)) { |
xmlNodePtr cur; |
@@ -12073,7 +12804,20 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
ctxt->myDoc->children = content; |
ctxt->myDoc->last = last; |
} |
- |
+ |
+ /* |
+ * Record in the parent context the number of entities replacement |
+ * done when parsing that reference. |
+ */ |
+ if (oldctxt != NULL) |
+ oldctxt->nbentities += ctxt->nbentities; |
+ |
+ /* |
+ * Also record the last error if any |
+ */ |
+ if (ctxt->lastError.code != XML_ERR_OK) |
+ xmlCopyError(&ctxt->lastError, &oldctxt->lastError); |
+ |
ctxt->sax = oldsax; |
ctxt->dict = NULL; |
ctxt->attsDefault = NULL; |
@@ -12082,7 +12826,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
if (newDoc != NULL) { |
xmlFreeDoc(newDoc); |
} |
- |
+ |
return(ret); |
} |
@@ -12156,22 +12900,22 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, |
if (doc->type == XML_DOCUMENT_NODE) |
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); |
#ifdef LIBXML_HTML_ENABLED |
- else if (doc->type == XML_HTML_DOCUMENT_NODE) |
+ else if (doc->type == XML_HTML_DOCUMENT_NODE) { |
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); |
+ /* |
+ * When parsing in context, it makes no sense to add implied |
+ * elements like html/body/etc... |
+ */ |
+ options |= HTML_PARSE_NOIMPLIED; |
+ } |
#endif |
else |
return(XML_ERR_INTERNAL_ERROR); |
if (ctxt == NULL) |
return(XML_ERR_NO_MEMORY); |
- fake = xmlNewComment(NULL); |
- if (fake == NULL) { |
- xmlFreeParserCtxt(ctxt); |
- return(XML_ERR_NO_MEMORY); |
- } |
- xmlAddChild(node, fake); |
- /* |
+ /* |
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set. |
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict |
* we must wait until the last moment to free the original one. |
@@ -12183,10 +12927,32 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, |
} else |
options |= XML_PARSE_NODICT; |
- xmlCtxtUseOptions(ctxt, options); |
+ if (doc->encoding != NULL) { |
+ xmlCharEncodingHandlerPtr hdlr; |
+ |
+ if (ctxt->encoding != NULL) |
+ xmlFree((xmlChar *) ctxt->encoding); |
+ ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); |
+ |
+ hdlr = xmlFindCharEncodingHandler(doc->encoding); |
+ if (hdlr != NULL) { |
+ xmlSwitchToEncoding(ctxt, hdlr); |
+ } else { |
+ return(XML_ERR_UNSUPPORTED_ENCODING); |
+ } |
+ } |
+ |
+ xmlCtxtUseOptionsInternal(ctxt, options, NULL); |
xmlDetectSAX2(ctxt); |
ctxt->myDoc = doc; |
+ fake = xmlNewComment(NULL); |
+ if (fake == NULL) { |
+ xmlFreeParserCtxt(ctxt); |
+ return(XML_ERR_NO_MEMORY); |
+ } |
+ xmlAddChild(node, fake); |
+ |
if (node->type == XML_ELEMENT_NODE) { |
nodePush(ctxt, node); |
/* |
@@ -12215,7 +12981,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, |
cur = cur->parent; |
} |
ctxt->instate = XML_PARSER_CONTENT; |
- } |
+ } |
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { |
/* |
@@ -12250,12 +13016,12 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, |
} else { |
ret = XML_ERR_OK; |
} |
- |
+ |
/* |
* Return the newly created nodeset after unlinking it from |
* the pseudo sibling. |
*/ |
- |
+ |
cur = fake->next; |
fake->next = NULL; |
node->last = fake; |
@@ -12283,7 +13049,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, |
if (doc->dict != NULL) |
ctxt->dict = NULL; |
xmlFreeParserCtxt(ctxt); |
- |
+ |
return(ret); |
#else /* !SAX2 */ |
return(XML_ERR_INTERNAL_ERROR); |
@@ -12353,6 +13119,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, |
xmlFreeParserCtxt(ctxt); |
return(-1); |
} |
+ newDoc->properties = XML_DOC_INTERNAL; |
if ((doc != NULL) && (doc->dict != NULL)) { |
xmlDictFree(ctxt->dict); |
ctxt->dict = doc->dict; |
@@ -12362,7 +13129,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, |
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); |
ctxt->dictNames = 1; |
} else { |
- xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT); |
+ xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); |
} |
if (doc != NULL) { |
newDoc->intSubset = doc->intSubset; |
@@ -12425,7 +13192,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, |
} else { |
ret = 0; |
} |
- |
+ |
if ((lst != NULL) && ((ret == 0) || (recover == 1))) { |
xmlNodePtr cur; |
@@ -12442,15 +13209,15 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, |
} |
newDoc->children->children = NULL; |
} |
- |
- if (sax != NULL) |
+ |
+ if (sax != NULL) |
ctxt->sax = oldsax; |
xmlFreeParserCtxt(ctxt); |
newDoc->intSubset = NULL; |
newDoc->extSubset = NULL; |
newDoc->oldNs = NULL; |
xmlFreeDoc(newDoc); |
- |
+ |
return(ret); |
} |
@@ -12498,7 +13265,7 @@ xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { |
if (sax != NULL) |
ctxt->sax = NULL; |
xmlFreeParserCtxt(ctxt); |
- |
+ |
return(ret); |
} |
@@ -12522,10 +13289,11 @@ xmlParseEntity(const char *filename) { |
#endif /* LIBXML_SAX1_ENABLED */ |
/** |
- * xmlCreateEntityParserCtxt: |
+ * xmlCreateEntityParserCtxtInternal: |
* @URL: the entity URL |
* @ID: the entity PUBLIC ID |
* @base: a possible base for the target URI |
+ * @pctx: parser context used to set options on new context |
* |
* Create a parser context for an external entity |
* Automatic support for ZLIB/Compress compressed document is provided |
@@ -12533,19 +13301,24 @@ xmlParseEntity(const char *filename) { |
* |
* Returns the new parser context or NULL |
*/ |
-xmlParserCtxtPtr |
-xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, |
- const xmlChar *base) { |
+static xmlParserCtxtPtr |
+xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, |
+ const xmlChar *base, xmlParserCtxtPtr pctx) { |
xmlParserCtxtPtr ctxt; |
xmlParserInputPtr inputStream; |
char *directory = NULL; |
xmlChar *uri; |
- |
+ |
ctxt = xmlNewParserCtxt(); |
if (ctxt == NULL) { |
return(NULL); |
} |
+ if (pctx != NULL) { |
+ ctxt->options = pctx->options; |
+ ctxt->_private = pctx->_private; |
+ } |
+ |
uri = xmlBuildURI(URL, base); |
if (uri == NULL) { |
@@ -12580,9 +13353,28 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, |
return(ctxt); |
} |
+/** |
+ * xmlCreateEntityParserCtxt: |
+ * @URL: the entity URL |
+ * @ID: the entity PUBLIC ID |
+ * @base: a possible base for the target URI |
+ * |
+ * Create a parser context for an external entity |
+ * Automatic support for ZLIB/Compress compressed document is provided |
+ * by default if found at compile-time. |
+ * |
+ * Returns the new parser context or NULL |
+ */ |
+xmlParserCtxtPtr |
+xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, |
+ const xmlChar *base) { |
+ return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); |
+ |
+} |
+ |
/************************************************************************ |
* * |
- * Front ends when parsing from a file * |
+ * Front ends when parsing from a file * |
* * |
************************************************************************/ |
@@ -12611,9 +13403,9 @@ xmlCreateURLParserCtxt(const char *filename, int options) |
} |
if (options) |
- xmlCtxtUseOptions(ctxt, options); |
+ xmlCtxtUseOptionsInternal(ctxt, options, NULL); |
ctxt->linenumbers = 1; |
- |
+ |
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); |
if (inputStream == NULL) { |
xmlFreeParserCtxt(ctxt); |
@@ -12748,7 +13540,7 @@ xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, |
*/ |
xmlDocPtr |
-xmlRecoverDoc(xmlChar *cur) { |
+xmlRecoverDoc(const xmlChar *cur) { |
return(xmlSAXParseDoc(NULL, cur, 1)); |
} |
@@ -12950,6 +13742,8 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, |
xmlDocPtr ret; |
xmlParserCtxtPtr ctxt; |
+ xmlInitParser(); |
+ |
ctxt = xmlCreateMemoryParserCtxt(buffer, size); |
if (ctxt == NULL) return(NULL); |
if (sax != NULL) { |
@@ -12975,7 +13769,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, |
if (sax != NULL) |
ctxt->sax = NULL; |
xmlFreeParserCtxt(ctxt); |
- |
+ |
return(ret); |
} |
@@ -13038,14 +13832,16 @@ xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { |
* |
* A better SAX parsing routine. |
* parse an XML in-memory buffer and call the given SAX handler routines. |
- * |
+ * |
* Returns 0 in case of success or a error number otherwise |
*/ |
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, |
const char *buffer, int size) { |
int ret = 0; |
xmlParserCtxtPtr ctxt; |
- |
+ |
+ xmlInitParser(); |
+ |
ctxt = xmlCreateMemoryParserCtxt(buffer, size); |
if (ctxt == NULL) return -1; |
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) |
@@ -13055,7 +13851,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, |
if (user_data != NULL) |
ctxt->userData = user_data; |
- |
+ |
xmlParseDocument(ctxt); |
if (ctxt->wellFormed) |
@@ -13229,11 +14025,11 @@ xmlInitParser(void) { |
__xmlGlobalInitMutexLock(); |
if (xmlParserInitialized == 0) { |
#endif |
+ xmlInitGlobals(); |
+ xmlInitThreads(); |
if ((xmlGenericError == xmlGenericErrorDefaultFunc) || |
(xmlGenericError == NULL)) |
initGenericErrorDefaultFunc(NULL); |
- xmlInitGlobals(); |
- xmlInitThreads(); |
xmlInitMemory(); |
xmlInitCharEncodingHandlers(); |
xmlDefaultSAXHandlerInit(); |
@@ -13267,6 +14063,14 @@ xmlInitParser(void) { |
* the library and all XML/HTML documents built with it. |
* See also xmlInitParser() which has the opposite function of preparing |
* the library for operations. |
+ * |
+ * WARNING: if your application is multithreaded or has plugin support |
+ * calling this may crash the application if another thread or |
+ * a plugin is still using libxml2. It's sometimes very hard to |
+ * guess if libxml2 is in use in the application, some libraries |
+ * or plugins may use it without notice. In case of doubt abstain |
+ * from calling this function or do it just before calling exit() |
+ * to avoid leak reports from valgrind ! |
*/ |
void |
@@ -13389,6 +14193,8 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) |
ctxt->depth = 0; |
ctxt->charset = XML_CHAR_ENCODING_UTF8; |
ctxt->catalogs = NULL; |
+ ctxt->nbentities = 0; |
+ ctxt->sizeentities = 0; |
xmlInitNodeInfoSeq(&ctxt->node_seq); |
if (ctxt->attsDefault != NULL) { |
@@ -13500,6 +14306,10 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, |
if (encoding != NULL) { |
xmlCharEncodingHandlerPtr hdlr; |
+ if (ctxt->encoding != NULL) |
+ xmlFree((xmlChar *) ctxt->encoding); |
+ ctxt->encoding = xmlStrdup((const xmlChar *) encoding); |
+ |
hdlr = xmlFindCharEncodingHandler(encoding); |
if (hdlr != NULL) { |
xmlSwitchToEncoding(ctxt, hdlr); |
@@ -13514,50 +14324,63 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, |
return(0); |
} |
+ |
/** |
- * xmlCtxtUseOptions: |
+ * xmlCtxtUseOptionsInternal: |
* @ctxt: an XML parser context |
* @options: a combination of xmlParserOption |
+ * @encoding: the user provided encoding to use |
* |
* Applies the options to the parser context |
* |
* Returns 0 in case of success, the set of unknown or unimplemented options |
* in case of error. |
*/ |
-int |
-xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) |
+static int |
+xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) |
{ |
if (ctxt == NULL) |
return(-1); |
+ if (encoding != NULL) { |
+ if (ctxt->encoding != NULL) |
+ xmlFree((xmlChar *) ctxt->encoding); |
+ ctxt->encoding = xmlStrdup((const xmlChar *) encoding); |
+ } |
if (options & XML_PARSE_RECOVER) { |
ctxt->recovery = 1; |
options -= XML_PARSE_RECOVER; |
+ ctxt->options |= XML_PARSE_RECOVER; |
} else |
ctxt->recovery = 0; |
if (options & XML_PARSE_DTDLOAD) { |
ctxt->loadsubset = XML_DETECT_IDS; |
options -= XML_PARSE_DTDLOAD; |
+ ctxt->options |= XML_PARSE_DTDLOAD; |
} else |
ctxt->loadsubset = 0; |
if (options & XML_PARSE_DTDATTR) { |
ctxt->loadsubset |= XML_COMPLETE_ATTRS; |
options -= XML_PARSE_DTDATTR; |
+ ctxt->options |= XML_PARSE_DTDATTR; |
} |
if (options & XML_PARSE_NOENT) { |
ctxt->replaceEntities = 1; |
/* ctxt->loadsubset |= XML_DETECT_IDS; */ |
options -= XML_PARSE_NOENT; |
+ ctxt->options |= XML_PARSE_NOENT; |
} else |
ctxt->replaceEntities = 0; |
if (options & XML_PARSE_PEDANTIC) { |
ctxt->pedantic = 1; |
options -= XML_PARSE_PEDANTIC; |
+ ctxt->options |= XML_PARSE_PEDANTIC; |
} else |
ctxt->pedantic = 0; |
if (options & XML_PARSE_NOBLANKS) { |
ctxt->keepBlanks = 0; |
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; |
options -= XML_PARSE_NOBLANKS; |
+ ctxt->options |= XML_PARSE_NOBLANKS; |
} else |
ctxt->keepBlanks = 1; |
if (options & XML_PARSE_DTDVALID) { |
@@ -13567,6 +14390,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) |
if (options & XML_PARSE_NOERROR) |
ctxt->vctxt.error = NULL; |
options -= XML_PARSE_DTDVALID; |
+ ctxt->options |= XML_PARSE_DTDVALID; |
} else |
ctxt->validate = 0; |
if (options & XML_PARSE_NOWARNING) { |
@@ -13586,17 +14410,20 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) |
ctxt->sax->endElementNs = NULL; |
ctxt->sax->initialized = 1; |
options -= XML_PARSE_SAX1; |
+ ctxt->options |= XML_PARSE_SAX1; |
} |
#endif /* LIBXML_SAX1_ENABLED */ |
if (options & XML_PARSE_NODICT) { |
ctxt->dictNames = 0; |
options -= XML_PARSE_NODICT; |
+ ctxt->options |= XML_PARSE_NODICT; |
} else { |
ctxt->dictNames = 1; |
} |
if (options & XML_PARSE_NOCDATA) { |
ctxt->sax->cdataBlock = NULL; |
options -= XML_PARSE_NOCDATA; |
+ ctxt->options |= XML_PARSE_NOCDATA; |
} |
if (options & XML_PARSE_NSCLEAN) { |
ctxt->options |= XML_PARSE_NSCLEAN; |
@@ -13610,11 +14437,43 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) |
ctxt->options |= XML_PARSE_COMPACT; |
options -= XML_PARSE_COMPACT; |
} |
+ if (options & XML_PARSE_OLD10) { |
+ ctxt->options |= XML_PARSE_OLD10; |
+ options -= XML_PARSE_OLD10; |
+ } |
+ if (options & XML_PARSE_NOBASEFIX) { |
+ ctxt->options |= XML_PARSE_NOBASEFIX; |
+ options -= XML_PARSE_NOBASEFIX; |
+ } |
+ if (options & XML_PARSE_HUGE) { |
+ ctxt->options |= XML_PARSE_HUGE; |
+ options -= XML_PARSE_HUGE; |
+ } |
+ if (options & XML_PARSE_OLDSAX) { |
+ ctxt->options |= XML_PARSE_OLDSAX; |
+ options -= XML_PARSE_OLDSAX; |
+ } |
ctxt->linenumbers = 1; |
return (options); |
} |
/** |
+ * xmlCtxtUseOptions: |
+ * @ctxt: an XML parser context |
+ * @options: a combination of xmlParserOption |
+ * |
+ * Applies the options to the parser context |
+ * |
+ * Returns 0 in case of success, the set of unknown or unimplemented options |
+ * in case of error. |
+ */ |
+int |
+xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) |
+{ |
+ return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); |
+} |
+ |
+/** |
* xmlDoRead: |
* @ctxt: an XML parser context |
* @URL: the base URL to use for the document |
@@ -13623,7 +14482,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) |
* @reuse: keep the context for reuse |
* |
* Common front-end for the xmlRead functions |
- * |
+ * |
* Returns the resulting document tree or NULL |
*/ |
static xmlDocPtr |
@@ -13631,8 +14490,8 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, |
int options, int reuse) |
{ |
xmlDocPtr ret; |
- |
- xmlCtxtUseOptions(ctxt, options); |
+ |
+ xmlCtxtUseOptionsInternal(ctxt, options, encoding); |
if (encoding != NULL) { |
xmlCharEncodingHandlerPtr hdlr; |