| Index: third_party/libxml/src/parser.c
|
| diff --git a/third_party/libxml/src/parser.c b/third_party/libxml/src/parser.c
|
| index 6ee55378f3fc4c125d923422f142980787004a3e..1d9396786ba7eca5d8e985d18e2679398243acd4 100644
|
| --- a/third_party/libxml/src/parser.c
|
| +++ b/third_party/libxml/src/parser.c
|
| @@ -17,7 +17,7 @@
|
| * parserInternals.c to reduce this file size.
|
| * As much as possible the functions are associated with their relative
|
| * production in the XML specification. A few productions defining the
|
| - * different ranges of character are actually implanted either in
|
| + * different ranges of character are actually implanted either in
|
| * parserInternals.h or parserInternals.c
|
| * The DOM tree build is realized from the default SAX callbacks in
|
| * the module SAX.c.
|
| @@ -40,6 +40,7 @@
|
| #endif
|
|
|
| #include <stdlib.h>
|
| +#include <limits.h>
|
| #include <string.h>
|
| #include <stdarg.h>
|
| #include <libxml/xmlmemory.h>
|
| @@ -79,6 +80,12 @@
|
| #ifdef HAVE_ZLIB_H
|
| #include <zlib.h>
|
| #endif
|
| +#ifdef HAVE_LZMA_H
|
| +#include <lzma.h>
|
| +#endif
|
| +
|
| +#include "buf.h"
|
| +#include "enc.h"
|
|
|
| static void
|
| xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
|
| @@ -114,16 +121,56 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
|
| * parser option.
|
| */
|
| static int
|
| -xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
|
| - xmlEntityPtr ent)
|
| +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
|
| + xmlEntityPtr ent, size_t replacement)
|
| {
|
| - unsigned long consumed = 0;
|
| + size_t consumed = 0;
|
|
|
| if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
|
| return (0);
|
| if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
|
| return (1);
|
| - if (size != 0) {
|
| +
|
| + /*
|
| + * This may look absurd but is needed to detect
|
| + * entities problems
|
| + */
|
| + if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
|
| + (ent->content != NULL) && (ent->checked == 0)) {
|
| + unsigned long oldnbent = ctxt->nbentities;
|
| + xmlChar *rep;
|
| +
|
| + ent->checked = 1;
|
| +
|
| + rep = xmlStringDecodeEntities(ctxt, ent->content,
|
| + XML_SUBSTITUTE_REF, 0, 0, 0);
|
| +
|
| + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
|
| + if (rep != NULL) {
|
| + if (xmlStrchr(rep, '<'))
|
| + ent->checked |= 1;
|
| + xmlFree(rep);
|
| + rep = NULL;
|
| + }
|
| + }
|
| + if (replacement != 0) {
|
| + if (replacement < XML_MAX_TEXT_LENGTH)
|
| + return(0);
|
| +
|
| + /*
|
| + * If the volume of entity copy reaches 10 times the
|
| + * amount of parsed data and over the large text threshold
|
| + * then that's very likely to be an abuse.
|
| + */
|
| + if (ctxt->input != NULL) {
|
| + consumed = ctxt->input->consumed +
|
| + (ctxt->input->cur - ctxt->input->base);
|
| + }
|
| + consumed += ctxt->sizeentities;
|
| +
|
| + if (replacement < XML_PARSER_NON_LINEAR * consumed)
|
| + return(0);
|
| + } else if (size != 0) {
|
| /*
|
| * Do the check based on the replacement size of the entity
|
| */
|
| @@ -146,7 +193,7 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
|
| /*
|
| * use the number of parsed entities in the replacement
|
| */
|
| - size = ent->checked;
|
| + size = ent->checked / 2;
|
|
|
| /*
|
| * The amount of data parsed counting entities size only once
|
| @@ -165,11 +212,13 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
|
| return (0);
|
| } else {
|
| /*
|
| - * strange we got no data for checking just return
|
| + * strange we got no data for checking
|
| */
|
| - return (0);
|
| + if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
|
| + (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
|
| + (ctxt->nbentities <= 10000))
|
| + return (0);
|
| }
|
| -
|
| xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
|
| return (1);
|
| }
|
| @@ -191,12 +240,24 @@ unsigned int xmlParserMaxDepth = 256;
|
| #define XML_PARSER_BUFFER_SIZE 100
|
| #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
|
|
|
| +/**
|
| + * XML_PARSER_CHUNK_SIZE
|
| + *
|
| + * When calling GROW that's the minimal amount of data
|
| + * the parser expected to have received. It is not a hard
|
| + * limit but an optimization when reading strings like Names
|
| + * It is not strictly needed as long as inputs available characters
|
| + * are followed by 0, which should be provided by the I/O level
|
| + */
|
| +#define XML_PARSER_CHUNK_SIZE 100
|
| +
|
| /*
|
| * List of XML prefixed PI allowed by W3C specs
|
| */
|
|
|
| static const char *xmlW3CPIs[] = {
|
| "xml-stylesheet",
|
| + "xml-model",
|
| NULL
|
| };
|
|
|
| @@ -229,7 +290,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
|
|
|
| /************************************************************************
|
| * *
|
| - * Some factorized error routines *
|
| + * Some factorized error routines *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -281,193 +342,201 @@ static void
|
| xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
|
| {
|
| const char *errmsg;
|
| + char errstr[129] = "";
|
|
|
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
|
| (ctxt->instate == XML_PARSER_EOF))
|
| return;
|
| switch (error) {
|
| case XML_ERR_INVALID_HEX_CHARREF:
|
| - errmsg = "CharRef: invalid hexadecimal value\n";
|
| + errmsg = "CharRef: invalid hexadecimal value";
|
| break;
|
| case XML_ERR_INVALID_DEC_CHARREF:
|
| - errmsg = "CharRef: invalid decimal value\n";
|
| + errmsg = "CharRef: invalid decimal value";
|
| break;
|
| case XML_ERR_INVALID_CHARREF:
|
| - errmsg = "CharRef: invalid value\n";
|
| + errmsg = "CharRef: invalid value";
|
| break;
|
| case XML_ERR_INTERNAL_ERROR:
|
| errmsg = "internal error";
|
| break;
|
| case XML_ERR_PEREF_AT_EOF:
|
| - errmsg = "PEReference at end of document\n";
|
| + errmsg = "PEReference at end of document";
|
| break;
|
| case XML_ERR_PEREF_IN_PROLOG:
|
| - errmsg = "PEReference in prolog\n";
|
| + errmsg = "PEReference in prolog";
|
| break;
|
| case XML_ERR_PEREF_IN_EPILOG:
|
| - errmsg = "PEReference in epilog\n";
|
| + errmsg = "PEReference in epilog";
|
| break;
|
| case XML_ERR_PEREF_NO_NAME:
|
| - errmsg = "PEReference: no name\n";
|
| + errmsg = "PEReference: no name";
|
| break;
|
| case XML_ERR_PEREF_SEMICOL_MISSING:
|
| - errmsg = "PEReference: expecting ';'\n";
|
| + errmsg = "PEReference: expecting ';'";
|
| break;
|
| case XML_ERR_ENTITY_LOOP:
|
| - errmsg = "Detected an entity reference loop\n";
|
| + errmsg = "Detected an entity reference loop";
|
| break;
|
| case XML_ERR_ENTITY_NOT_STARTED:
|
| - errmsg = "EntityValue: \" or ' expected\n";
|
| + errmsg = "EntityValue: \" or ' expected";
|
| break;
|
| case XML_ERR_ENTITY_PE_INTERNAL:
|
| - errmsg = "PEReferences forbidden in internal subset\n";
|
| + errmsg = "PEReferences forbidden in internal subset";
|
| break;
|
| case XML_ERR_ENTITY_NOT_FINISHED:
|
| - errmsg = "EntityValue: \" or ' expected\n";
|
| + errmsg = "EntityValue: \" or ' expected";
|
| break;
|
| case XML_ERR_ATTRIBUTE_NOT_STARTED:
|
| - errmsg = "AttValue: \" or ' expected\n";
|
| + errmsg = "AttValue: \" or ' expected";
|
| break;
|
| case XML_ERR_LT_IN_ATTRIBUTE:
|
| - errmsg = "Unescaped '<' not allowed in attributes values\n";
|
| + errmsg = "Unescaped '<' not allowed in attributes values";
|
| break;
|
| case XML_ERR_LITERAL_NOT_STARTED:
|
| - errmsg = "SystemLiteral \" or ' expected\n";
|
| + errmsg = "SystemLiteral \" or ' expected";
|
| break;
|
| case XML_ERR_LITERAL_NOT_FINISHED:
|
| - errmsg = "Unfinished System or Public ID \" or ' expected\n";
|
| + errmsg = "Unfinished System or Public ID \" or ' expected";
|
| break;
|
| case XML_ERR_MISPLACED_CDATA_END:
|
| - errmsg = "Sequence ']]>' not allowed in content\n";
|
| + errmsg = "Sequence ']]>' not allowed in content";
|
| break;
|
| case XML_ERR_URI_REQUIRED:
|
| - errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
|
| + errmsg = "SYSTEM or PUBLIC, the URI is missing";
|
| break;
|
| case XML_ERR_PUBID_REQUIRED:
|
| - errmsg = "PUBLIC, the Public Identifier is missing\n";
|
| + errmsg = "PUBLIC, the Public Identifier is missing";
|
| break;
|
| case XML_ERR_HYPHEN_IN_COMMENT:
|
| - errmsg = "Comment must not contain '--' (double-hyphen)\n";
|
| + errmsg = "Comment must not contain '--' (double-hyphen)";
|
| break;
|
| case XML_ERR_PI_NOT_STARTED:
|
| - errmsg = "xmlParsePI : no target name\n";
|
| + errmsg = "xmlParsePI : no target name";
|
| break;
|
| case XML_ERR_RESERVED_XML_NAME:
|
| - errmsg = "Invalid PI name\n";
|
| + errmsg = "Invalid PI name";
|
| break;
|
| case XML_ERR_NOTATION_NOT_STARTED:
|
| - errmsg = "NOTATION: Name expected here\n";
|
| + errmsg = "NOTATION: Name expected here";
|
| break;
|
| case XML_ERR_NOTATION_NOT_FINISHED:
|
| - errmsg = "'>' required to close NOTATION declaration\n";
|
| + errmsg = "'>' required to close NOTATION declaration";
|
| break;
|
| case XML_ERR_VALUE_REQUIRED:
|
| - errmsg = "Entity value required\n";
|
| + errmsg = "Entity value required";
|
| break;
|
| case XML_ERR_URI_FRAGMENT:
|
| errmsg = "Fragment not allowed";
|
| break;
|
| case XML_ERR_ATTLIST_NOT_STARTED:
|
| - errmsg = "'(' required to start ATTLIST enumeration\n";
|
| + errmsg = "'(' required to start ATTLIST enumeration";
|
| break;
|
| case XML_ERR_NMTOKEN_REQUIRED:
|
| - errmsg = "NmToken expected in ATTLIST enumeration\n";
|
| + errmsg = "NmToken expected in ATTLIST enumeration";
|
| break;
|
| case XML_ERR_ATTLIST_NOT_FINISHED:
|
| - errmsg = "')' required to finish ATTLIST enumeration\n";
|
| + errmsg = "')' required to finish ATTLIST enumeration";
|
| break;
|
| case XML_ERR_MIXED_NOT_STARTED:
|
| - errmsg = "MixedContentDecl : '|' or ')*' expected\n";
|
| + errmsg = "MixedContentDecl : '|' or ')*' expected";
|
| break;
|
| case XML_ERR_PCDATA_REQUIRED:
|
| - errmsg = "MixedContentDecl : '#PCDATA' expected\n";
|
| + errmsg = "MixedContentDecl : '#PCDATA' expected";
|
| break;
|
| case XML_ERR_ELEMCONTENT_NOT_STARTED:
|
| - errmsg = "ContentDecl : Name or '(' expected\n";
|
| + errmsg = "ContentDecl : Name or '(' expected";
|
| break;
|
| case XML_ERR_ELEMCONTENT_NOT_FINISHED:
|
| - errmsg = "ContentDecl : ',' '|' or ')' expected\n";
|
| + errmsg = "ContentDecl : ',' '|' or ')' expected";
|
| break;
|
| case XML_ERR_PEREF_IN_INT_SUBSET:
|
| errmsg =
|
| - "PEReference: forbidden within markup decl in internal subset\n";
|
| + "PEReference: forbidden within markup decl in internal subset";
|
| break;
|
| case XML_ERR_GT_REQUIRED:
|
| - errmsg = "expected '>'\n";
|
| + errmsg = "expected '>'";
|
| break;
|
| case XML_ERR_CONDSEC_INVALID:
|
| - errmsg = "XML conditional section '[' expected\n";
|
| + errmsg = "XML conditional section '[' expected";
|
| break;
|
| case XML_ERR_EXT_SUBSET_NOT_FINISHED:
|
| - errmsg = "Content error in the external subset\n";
|
| + errmsg = "Content error in the external subset";
|
| break;
|
| case XML_ERR_CONDSEC_INVALID_KEYWORD:
|
| errmsg =
|
| - "conditional section INCLUDE or IGNORE keyword expected\n";
|
| + "conditional section INCLUDE or IGNORE keyword expected";
|
| break;
|
| case XML_ERR_CONDSEC_NOT_FINISHED:
|
| - errmsg = "XML conditional section not closed\n";
|
| + errmsg = "XML conditional section not closed";
|
| break;
|
| case XML_ERR_XMLDECL_NOT_STARTED:
|
| - errmsg = "Text declaration '<?xml' required\n";
|
| + errmsg = "Text declaration '<?xml' required";
|
| break;
|
| case XML_ERR_XMLDECL_NOT_FINISHED:
|
| - errmsg = "parsing XML declaration: '?>' expected\n";
|
| + errmsg = "parsing XML declaration: '?>' expected";
|
| break;
|
| case XML_ERR_EXT_ENTITY_STANDALONE:
|
| - errmsg = "external parsed entities cannot be standalone\n";
|
| + errmsg = "external parsed entities cannot be standalone";
|
| break;
|
| case XML_ERR_ENTITYREF_SEMICOL_MISSING:
|
| - errmsg = "EntityRef: expecting ';'\n";
|
| + errmsg = "EntityRef: expecting ';'";
|
| break;
|
| case XML_ERR_DOCTYPE_NOT_FINISHED:
|
| - errmsg = "DOCTYPE improperly terminated\n";
|
| + errmsg = "DOCTYPE improperly terminated";
|
| break;
|
| case XML_ERR_LTSLASH_REQUIRED:
|
| - errmsg = "EndTag: '</' not found\n";
|
| + errmsg = "EndTag: '</' not found";
|
| break;
|
| case XML_ERR_EQUAL_REQUIRED:
|
| - errmsg = "expected '='\n";
|
| + errmsg = "expected '='";
|
| break;
|
| case XML_ERR_STRING_NOT_CLOSED:
|
| - errmsg = "String not closed expecting \" or '\n";
|
| + errmsg = "String not closed expecting \" or '";
|
| break;
|
| case XML_ERR_STRING_NOT_STARTED:
|
| - errmsg = "String not started expecting ' or \"\n";
|
| + errmsg = "String not started expecting ' or \"";
|
| break;
|
| case XML_ERR_ENCODING_NAME:
|
| - errmsg = "Invalid XML encoding name\n";
|
| + errmsg = "Invalid XML encoding name";
|
| break;
|
| case XML_ERR_STANDALONE_VALUE:
|
| - errmsg = "standalone accepts only 'yes' or 'no'\n";
|
| + errmsg = "standalone accepts only 'yes' or 'no'";
|
| break;
|
| case XML_ERR_DOCUMENT_EMPTY:
|
| - errmsg = "Document is empty\n";
|
| + errmsg = "Document is empty";
|
| break;
|
| case XML_ERR_DOCUMENT_END:
|
| - errmsg = "Extra content at the end of the document\n";
|
| + errmsg = "Extra content at the end of the document";
|
| break;
|
| case XML_ERR_NOT_WELL_BALANCED:
|
| - errmsg = "chunk is not well balanced\n";
|
| + errmsg = "chunk is not well balanced";
|
| break;
|
| case XML_ERR_EXTRA_CONTENT:
|
| - errmsg = "extra content at the end of well balanced chunk\n";
|
| + errmsg = "extra content at the end of well balanced chunk";
|
| break;
|
| case XML_ERR_VERSION_MISSING:
|
| - errmsg = "Malformed declaration expecting version\n";
|
| + errmsg = "Malformed declaration expecting version";
|
| + break;
|
| + case XML_ERR_NAME_TOO_LONG:
|
| + errmsg = "Name too long use XML_PARSE_HUGE option";
|
| break;
|
| #if 0
|
| case:
|
| - errmsg = "\n";
|
| + errmsg = "";
|
| break;
|
| #endif
|
| default:
|
| - errmsg = "Unregistered error message\n";
|
| + errmsg = "Unregistered error message";
|
| }
|
| + if (info == NULL)
|
| + snprintf(errstr, 128, "%s\n", errmsg);
|
| + else
|
| + snprintf(errstr, 128, "%s: %%s\n", errmsg);
|
| if (ctxt != NULL)
|
| ctxt->errNo = error;
|
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
|
| - XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
|
| + XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
|
| info);
|
| if (ctxt != NULL) {
|
| ctxt->wellFormed = 0;
|
| @@ -622,7 +691,7 @@ xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| */
|
| static void
|
| xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| - const char *msg, const xmlChar *str1, int val,
|
| + const char *msg, const xmlChar *str1, int val,
|
| const xmlChar *str2)
|
| {
|
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
|
| @@ -731,7 +800,7 @@ xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| * @info1: extra information string
|
| * @info2: extra information string
|
| *
|
| - * Handle a fatal parser error, i.e. violating Well-Formedness constraints
|
| + * Handle a namespace warning error
|
| */
|
| static void
|
| xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
| @@ -750,7 +819,7 @@ xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
|
|
| /************************************************************************
|
| * *
|
| - * Library wide options *
|
| + * Library wide options *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -954,6 +1023,12 @@ xmlHasFeature(xmlFeature feature)
|
| #else
|
| return(0);
|
| #endif
|
| + case XML_WITH_LZMA:
|
| +#ifdef LIBXML_LZMA_ENABLED
|
| + return(1);
|
| +#else
|
| + return(0);
|
| +#endif
|
| case XML_WITH_ICU:
|
| #ifdef LIBXML_ICU_ENABLED
|
| return(1);
|
| @@ -968,7 +1043,7 @@ xmlHasFeature(xmlFeature feature)
|
|
|
| /************************************************************************
|
| * *
|
| - * SAX2 defaulted attributes handling *
|
| + * SAX2 defaulted attributes handling *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -992,8 +1067,8 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
|
| ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
|
| ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
|
| ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
|
| - if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
|
| - (ctxt->str_xml_ns == NULL)) {
|
| + if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
|
| + (ctxt->str_xml_ns == NULL)) {
|
| xmlErrMemory(ctxt, NULL);
|
| }
|
| }
|
| @@ -1303,60 +1378,182 @@ xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
|
| * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
|
| * [38] Subcode ::= ([a-z] | [A-Z])+
|
| *
|
| + * The current REC reference the sucessors of RFC 1766, currently 5646
|
| + *
|
| + * http://www.rfc-editor.org/rfc/rfc5646.txt
|
| + * langtag = language
|
| + * ["-" script]
|
| + * ["-" region]
|
| + * *("-" variant)
|
| + * *("-" extension)
|
| + * ["-" privateuse]
|
| + * language = 2*3ALPHA ; shortest ISO 639 code
|
| + * ["-" extlang] ; sometimes followed by
|
| + * ; extended language subtags
|
| + * / 4ALPHA ; or reserved for future use
|
| + * / 5*8ALPHA ; or registered language subtag
|
| + *
|
| + * extlang = 3ALPHA ; selected ISO 639 codes
|
| + * *2("-" 3ALPHA) ; permanently reserved
|
| + *
|
| + * script = 4ALPHA ; ISO 15924 code
|
| + *
|
| + * region = 2ALPHA ; ISO 3166-1 code
|
| + * / 3DIGIT ; UN M.49 code
|
| + *
|
| + * variant = 5*8alphanum ; registered variants
|
| + * / (DIGIT 3alphanum)
|
| + *
|
| + * extension = singleton 1*("-" (2*8alphanum))
|
| + *
|
| + * ; Single alphanumerics
|
| + * ; "x" reserved for private use
|
| + * singleton = DIGIT ; 0 - 9
|
| + * / %x41-57 ; A - W
|
| + * / %x59-5A ; Y - Z
|
| + * / %x61-77 ; a - w
|
| + * / %x79-7A ; y - z
|
| + *
|
| + * it sounds right to still allow Irregular i-xxx IANA and user codes too
|
| + * The parser below doesn't try to cope with extension or privateuse
|
| + * that could be added but that's not interoperable anyway
|
| + *
|
| * Returns 1 if correct 0 otherwise
|
| **/
|
| int
|
| xmlCheckLanguageID(const xmlChar * lang)
|
| {
|
| - const xmlChar *cur = lang;
|
| + const xmlChar *cur = lang, *nxt;
|
|
|
| if (cur == NULL)
|
| return (0);
|
| if (((cur[0] == 'i') && (cur[1] == '-')) ||
|
| - ((cur[0] == 'I') && (cur[1] == '-'))) {
|
| + ((cur[0] == 'I') && (cur[1] == '-')) ||
|
| + ((cur[0] == 'x') && (cur[1] == '-')) ||
|
| + ((cur[0] == 'X') && (cur[1] == '-'))) {
|
| /*
|
| - * IANA code
|
| + * Still allow IANA code and user code which were coming
|
| + * from the previous version of the XML-1.0 specification
|
| + * it's deprecated but we should not fail
|
| */
|
| cur += 2;
|
| - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
|
| + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
|
| ((cur[0] >= 'a') && (cur[0] <= 'z')))
|
| cur++;
|
| - } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
|
| - ((cur[0] == 'X') && (cur[1] == '-'))) {
|
| - /*
|
| - * User code
|
| - */
|
| - cur += 2;
|
| - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
|
| - ((cur[0] >= 'a') && (cur[0] <= 'z')))
|
| - cur++;
|
| - } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
|
| - ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
|
| + return(cur[0] == 0);
|
| + }
|
| + nxt = cur;
|
| + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
|
| + ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
|
| + nxt++;
|
| + if (nxt - cur >= 4) {
|
| /*
|
| - * ISO639
|
| + * Reserved
|
| */
|
| - cur++;
|
| - if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
|
| - ((cur[0] >= 'a') && (cur[0] <= 'z')))
|
| - cur++;
|
| - else
|
| - return (0);
|
| - } else
|
| - return (0);
|
| - while (cur[0] != 0) { /* non input consuming */
|
| - if (cur[0] != '-')
|
| - return (0);
|
| - cur++;
|
| - if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
|
| - ((cur[0] >= 'a') && (cur[0] <= 'z')))
|
| - cur++;
|
| - else
|
| - return (0);
|
| - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
|
| - ((cur[0] >= 'a') && (cur[0] <= 'z')))
|
| - cur++;
|
| + if ((nxt - cur > 8) || (nxt[0] != 0))
|
| + return(0);
|
| + return(1);
|
| }
|
| + if (nxt - cur < 2)
|
| + return(0);
|
| + /* we got an ISO 639 code */
|
| + if (nxt[0] == 0)
|
| + return(1);
|
| + if (nxt[0] != '-')
|
| + return(0);
|
| +
|
| + nxt++;
|
| + cur = nxt;
|
| + /* now we can have extlang or script or region or variant */
|
| + if ((nxt[0] >= '0') && (nxt[0] <= '9'))
|
| + goto region_m49;
|
| +
|
| + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
|
| + ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
|
| + nxt++;
|
| + if (nxt - cur == 4)
|
| + goto script;
|
| + if (nxt - cur == 2)
|
| + goto region;
|
| + if ((nxt - cur >= 5) && (nxt - cur <= 8))
|
| + goto variant;
|
| + if (nxt - cur != 3)
|
| + return(0);
|
| + /* we parsed an extlang */
|
| + if (nxt[0] == 0)
|
| + return(1);
|
| + if (nxt[0] != '-')
|
| + return(0);
|
| +
|
| + nxt++;
|
| + cur = nxt;
|
| + /* now we can have script or region or variant */
|
| + if ((nxt[0] >= '0') && (nxt[0] <= '9'))
|
| + goto region_m49;
|
| +
|
| + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
|
| + ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
|
| + nxt++;
|
| + if (nxt - cur == 2)
|
| + goto region;
|
| + if ((nxt - cur >= 5) && (nxt - cur <= 8))
|
| + goto variant;
|
| + if (nxt - cur != 4)
|
| + return(0);
|
| + /* we parsed a script */
|
| +script:
|
| + if (nxt[0] == 0)
|
| + return(1);
|
| + if (nxt[0] != '-')
|
| + return(0);
|
| +
|
| + nxt++;
|
| + cur = nxt;
|
| + /* now we can have region or variant */
|
| + if ((nxt[0] >= '0') && (nxt[0] <= '9'))
|
| + goto region_m49;
|
| +
|
| + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
|
| + ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
|
| + nxt++;
|
| +
|
| + if ((nxt - cur >= 5) && (nxt - cur <= 8))
|
| + goto variant;
|
| + if (nxt - cur != 2)
|
| + return(0);
|
| + /* we parsed a region */
|
| +region:
|
| + if (nxt[0] == 0)
|
| + return(1);
|
| + if (nxt[0] != '-')
|
| + return(0);
|
| +
|
| + nxt++;
|
| + cur = nxt;
|
| + /* now we can just have a variant */
|
| + while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
|
| + ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
|
| + nxt++;
|
| +
|
| + if ((nxt - cur < 5) || (nxt - cur > 8))
|
| + return(0);
|
| +
|
| + /* we parsed a variant */
|
| +variant:
|
| + if (nxt[0] == 0)
|
| + return(1);
|
| + if (nxt[0] != '-')
|
| + return(0);
|
| + /* extensions and private use subtags not checked */
|
| return (1);
|
| +
|
| +region_m49:
|
| + if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
|
| + ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
|
| + nxt += 3;
|
| + goto region;
|
| + }
|
| + return(0);
|
| }
|
|
|
| /************************************************************************
|
| @@ -1385,7 +1582,7 @@ nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
|
| {
|
| if (ctxt->options & XML_PARSE_NSCLEAN) {
|
| int i;
|
| - for (i = 0;i < ctxt->nsNr;i += 2) {
|
| + for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
|
| if (ctxt->nsTab[i] == prefix) {
|
| /* in scope */
|
| if (ctxt->nsTab[i + 1] == URL)
|
| @@ -1697,15 +1894,14 @@ namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
|
|
|
| if (ctxt->nameNr >= ctxt->nameMax) {
|
| const xmlChar * *tmp;
|
| - ctxt->nameMax *= 2;
|
| tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
|
| - ctxt->nameMax *
|
| + ctxt->nameMax * 2 *
|
| sizeof(ctxt->nameTab[0]));
|
| if (tmp == NULL) {
|
| - ctxt->nameMax /= 2;
|
| goto mem_error;
|
| }
|
| ctxt->nameTab = tmp;
|
| + ctxt->nameMax *= 2;
|
| }
|
| ctxt->nameTab[ctxt->nameNr] = value;
|
| ctxt->name = value;
|
| @@ -1791,7 +1987,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
|
| * to compare on ASCII based substring.
|
| * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
|
| * strings without newlines within the parser.
|
| - * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
|
| + * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
|
| * defined char within the parser.
|
| * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
|
| *
|
| @@ -1840,10 +2036,10 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
|
| #define SKIPL(val) do { \
|
| int skipl; \
|
| for(skipl=0; skipl<val; skipl++) { \
|
| - if (*(ctxt->input->cur) == '\n') { \
|
| + if (*(ctxt->input->cur) == '\n') { \
|
| ctxt->input->line++; ctxt->input->col = 1; \
|
| - } else ctxt->input->col++; \
|
| - ctxt->nbChars++; \
|
| + } else ctxt->input->col++; \
|
| + ctxt->nbChars++; \
|
| ctxt->input->cur++; \
|
| } \
|
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
|
| @@ -1869,6 +2065,16 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
|
| xmlGROW (ctxt);
|
|
|
| static void xmlGROW (xmlParserCtxtPtr ctxt) {
|
| + unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
|
| + unsigned long curBase = ctxt->input->cur - ctxt->input->base;
|
| +
|
| + if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
|
| + (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
|
| + ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
|
| + ctxt->instate = XML_PARSER_EOF;
|
| + }
|
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
|
| if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
|
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
|
| @@ -1929,6 +2135,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
|
| while (IS_BLANK_CH(*cur)) {
|
| if (*cur == '\n') {
|
| ctxt->input->line++; ctxt->input->col = 1;
|
| + } else {
|
| + ctxt->input->col++;
|
| }
|
| cur++;
|
| res++;
|
| @@ -2030,7 +2238,7 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
|
| *
|
| * [ WFC: Legal Character ]
|
| * Characters referred to using character references must match the
|
| - * production for Char.
|
| + * production for Char.
|
| *
|
| * Returns the value parsed (as an int), 0 in case of error
|
| */
|
| @@ -2054,7 +2262,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| return(0);
|
| }
|
| - if ((RAW >= '0') && (RAW <= '9'))
|
| + if ((RAW >= '0') && (RAW <= '9'))
|
| val = val * 16 + (CUR - '0');
|
| else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
|
| val = val * 16 + (CUR - 'a') + 10;
|
| @@ -2087,7 +2295,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| return(0);
|
| }
|
| - if ((RAW >= '0') && (RAW <= '9'))
|
| + if ((RAW >= '0') && (RAW <= '9'))
|
| val = val * 10 + (CUR - '0');
|
| else {
|
| xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
|
| @@ -2113,7 +2321,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
|
| /*
|
| * [ WFC: Legal Character ]
|
| * Characters referred to using character references must match the
|
| - * production for Char.
|
| + * production for Char.
|
| */
|
| if ((IS_CHAR(val) && (outofrange == 0))) {
|
| return(val);
|
| @@ -2138,7 +2346,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
|
| *
|
| * [ WFC: Legal Character ]
|
| * Characters referred to using character references must match the
|
| - * production for Char.
|
| + * production for Char.
|
| *
|
| * Returns the value parsed (as an int), 0 in case of error, str will be
|
| * updated to the current value of the index
|
| @@ -2157,7 +2365,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
|
| ptr += 3;
|
| cur = *ptr;
|
| while (cur != ';') { /* Non input consuming loop */
|
| - if ((cur >= '0') && (cur <= '9'))
|
| + if ((cur >= '0') && (cur <= '9'))
|
| val = val * 16 + (cur - '0');
|
| else if ((cur >= 'a') && (cur <= 'f'))
|
| val = val * 16 + (cur - 'a') + 10;
|
| @@ -2180,7 +2388,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
|
| ptr += 2;
|
| cur = *ptr;
|
| while (cur != ';') { /* Non input consuming loops */
|
| - if ((cur >= '0') && (cur <= '9'))
|
| + if ((cur >= '0') && (cur <= '9'))
|
| val = val * 10 + (cur - '0');
|
| else {
|
| xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
|
| @@ -2204,7 +2412,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
|
| /*
|
| * [ WFC: Legal Character ]
|
| * Characters referred to using character references must match the
|
| - * production for Char.
|
| + * production for Char.
|
| */
|
| if ((IS_CHAR(val) && (outofrange == 0))) {
|
| return(val);
|
| @@ -2226,9 +2434,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
|
| *
|
| * Returns the new input stream or NULL
|
| */
|
| -
|
| +
|
| static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
|
| -
|
| +
|
| static xmlParserInputPtr
|
| xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
|
| xmlParserInputPtr input;
|
| @@ -2251,7 +2459,7 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
|
| if (buffer == NULL) {
|
| xmlErrMemory(ctxt, NULL);
|
| xmlFree(input);
|
| - return(NULL);
|
| + return(NULL);
|
| }
|
| buffer [0] = ' ';
|
| buffer [1] = '%';
|
| @@ -2270,12 +2478,12 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
|
| /**
|
| * xmlParserHandlePEReference:
|
| * @ctxt: the parser context
|
| - *
|
| + *
|
| * [69] PEReference ::= '%' Name ';'
|
| *
|
| * [ WFC: No Recursion ]
|
| * A parsed entity must not contain a recursive
|
| - * reference to itself, either directly or indirectly.
|
| + * reference to itself, either directly or indirectly.
|
| *
|
| * [ WFC: Entity Declared ]
|
| * In a document without any DTD, a document with only an internal DTD
|
| @@ -2293,9 +2501,9 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
|
| * NOTE: misleading but this is handled.
|
| *
|
| * A PEReference may have been detected in the current input stream
|
| - * the handling is done accordingly to
|
| + * the handling is done accordingly to
|
| * http://www.w3.org/TR/REC-xml#entproc
|
| - * i.e.
|
| + * i.e.
|
| * - Included in literal in entity values
|
| * - Included as Parameter Entity reference within DTDs
|
| */
|
| @@ -2375,7 +2583,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| return;
|
| if (entity == NULL) {
|
| -
|
| +
|
| /*
|
| * [ WFC: Entity Declared ]
|
| * In a document without any DTD, a document with only an
|
| @@ -2401,12 +2609,13 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
| xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
|
| "PEReference: %%%s; not found\n",
|
| name, NULL);
|
| - } else
|
| + } else
|
| xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
|
| "PEReference: %%%s; not found\n",
|
| name, NULL);
|
| ctxt->valid = 0;
|
| }
|
| + xmlParserEntityCheck(ctxt, 0, NULL, 0);
|
| } else if (ctxt->input->free != deallocblankswrapper) {
|
| input = xmlNewBlanksWrapperInputStream(ctxt, entity);
|
| if (xmlPushInput(ctxt, input) < 0)
|
| @@ -2418,6 +2627,23 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
| xmlCharEncoding enc;
|
|
|
| /*
|
| + * Note: external parameter entities will not be loaded, it
|
| + * is not required for a non-validating parser, unless the
|
| + * option of validating, or substituting entities were
|
| + * given. Doing so is far more secure as the parser will
|
| + * only process data coming from the document entity by
|
| + * default.
|
| + */
|
| + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
|
| + ((ctxt->options & XML_PARSE_NOENT) == 0) &&
|
| + ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
|
| + ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
|
| + ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
|
| + (ctxt->replaceEntities == 0) &&
|
| + (ctxt->validate == 0))
|
| + return;
|
| +
|
| + /*
|
| * handle the extra spaces added before and after
|
| * c.f. http://www.w3.org/TR/REC-xml#as-PE
|
| * this is done independently.
|
| @@ -2426,7 +2652,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
| if (xmlPushInput(ctxt, input) < 0)
|
| return;
|
|
|
| - /*
|
| + /*
|
| * Get the 4 first bytes and decode the charset
|
| * if enc != XML_CHAR_ENCODING_NONE
|
| * plug some encoding conversion routines.
|
| @@ -2468,15 +2694,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
|
|
| /*
|
| * Macro used to grow the current buffer.
|
| + * buffer##_size is expected to be a size_t
|
| + * mem_error: is expected to handle memory allocation failures
|
| */
|
| #define growBuffer(buffer, n) { \
|
| xmlChar *tmp; \
|
| - buffer##_size *= 2; \
|
| - buffer##_size += n; \
|
| - tmp = (xmlChar *) \
|
| - xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
|
| + size_t new_size = buffer##_size * 2 + n; \
|
| + if (new_size < buffer##_size) goto mem_error; \
|
| + tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
|
| if (tmp == NULL) goto mem_error; \
|
| buffer = tmp; \
|
| + buffer##_size = new_size; \
|
| }
|
|
|
| /**
|
| @@ -2488,7 +2716,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
| * @end: an end marker xmlChar, 0 if none
|
| * @end2: an end marker xmlChar, 0 if none
|
| * @end3: an end marker xmlChar, 0 if none
|
| - *
|
| + *
|
| * Takes a entity string content and process to do the adequate substitutions.
|
| *
|
| * [67] Reference ::= EntityRef | CharRef
|
| @@ -2502,14 +2730,14 @@ xmlChar *
|
| xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| int what, xmlChar end, xmlChar end2, xmlChar end3) {
|
| xmlChar *buffer = NULL;
|
| - int buffer_size = 0;
|
| + size_t buffer_size = 0;
|
| + size_t nbchars = 0;
|
|
|
| xmlChar *current = NULL;
|
| xmlChar *rep = NULL;
|
| const xmlChar *last;
|
| xmlEntityPtr ent;
|
| int c,l;
|
| - int nbchars = 0;
|
|
|
| if ((ctxt == NULL) || (str == NULL) || (len < 0))
|
| return(NULL);
|
| @@ -2526,7 +2754,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| * allocate a translation buffer.
|
| */
|
| buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
|
| - buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
|
| + buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
|
| if (buffer == NULL) goto mem_error;
|
|
|
| /*
|
| @@ -2546,7 +2774,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| if (val != 0) {
|
| COPY_BUF(0,buffer,nbchars,val);
|
| }
|
| - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
|
| + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
|
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
|
| }
|
| } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
|
| @@ -2558,13 +2786,14 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
|
| (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
|
| goto int_error;
|
| + xmlParserEntityCheck(ctxt, 0, ent, 0);
|
| if (ent != NULL)
|
| - ctxt->nbentities += ent->checked;
|
| + ctxt->nbentities += ent->checked / 2;
|
| if ((ent != NULL) &&
|
| (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
|
| if (ent->content != NULL) {
|
| COPY_BUF(0,buffer,nbchars,ent->content[0]);
|
| - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
|
| + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
|
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
|
| }
|
| } else {
|
| @@ -2581,9 +2810,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| current = rep;
|
| while (*current != 0) { /* non input consuming loop */
|
| buffer[nbchars++] = *current++;
|
| - if (nbchars >
|
| - buffer_size - XML_PARSER_BUFFER_SIZE) {
|
| - if (xmlParserEntityCheck(ctxt, nbchars, ent))
|
| + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
|
| + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
|
| goto int_error;
|
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
|
| }
|
| @@ -2596,7 +2824,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| const xmlChar *cur = ent->name;
|
|
|
| buffer[nbchars++] = '&';
|
| - if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
|
| + if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
|
| growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
|
| }
|
| for (;i > 0;i--)
|
| @@ -2610,8 +2838,9 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| ent = xmlParseStringPEReference(ctxt, &str);
|
| if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
|
| goto int_error;
|
| + xmlParserEntityCheck(ctxt, 0, ent, 0);
|
| if (ent != NULL)
|
| - ctxt->nbentities += ent->checked;
|
| + ctxt->nbentities += ent->checked / 2;
|
| if (ent != NULL) {
|
| if (ent->content == NULL) {
|
| xmlLoadEntityContent(ctxt, ent);
|
| @@ -2624,9 +2853,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| current = rep;
|
| while (*current != 0) { /* non input consuming loop */
|
| buffer[nbchars++] = *current++;
|
| - if (nbchars >
|
| - buffer_size - XML_PARSER_BUFFER_SIZE) {
|
| - if (xmlParserEntityCheck(ctxt, nbchars, ent))
|
| + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
|
| + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
|
| goto int_error;
|
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
|
| }
|
| @@ -2638,8 +2866,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
| } else {
|
| COPY_BUF(l,buffer,nbchars,c);
|
| str += l;
|
| - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
|
| - growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
|
| + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
|
| + growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
|
| }
|
| }
|
| if (str < last)
|
| @@ -2668,7 +2896,7 @@ int_error:
|
| * @end: an end marker xmlChar, 0 if none
|
| * @end2: an end marker xmlChar, 0 if none
|
| * @end3: an end marker xmlChar, 0 if none
|
| - *
|
| + *
|
| * Takes a entity string content and process to do the adequate substitutions.
|
| *
|
| * [67] Reference ::= EntityRef | CharRef
|
| @@ -3031,7 +3259,7 @@ xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
|
| } else {
|
| if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
|
| (c == '.') || (c == '-') ||
|
| - (c == '_') || (c == ':') ||
|
| + (c == '_') || (c == ':') ||
|
| (IS_COMBINING(c)) ||
|
| (IS_EXTENDER(c)))
|
| return(1);
|
| @@ -3057,7 +3285,7 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
|
| */
|
| GROW;
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| - return(NULL);
|
| + return(NULL);
|
| c = CUR_CHAR(l);
|
| if ((ctxt->options & XML_PARSE_OLD10) == 0) {
|
| /*
|
| @@ -3106,7 +3334,7 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
|
| ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
|
| ((c >= 0x10000) && (c <= 0xEFFFF))
|
| )) {
|
| - if (count++ > 100) {
|
| + if (count++ > XML_PARSER_CHUNK_SIZE) {
|
| count = 0;
|
| GROW;
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| @@ -3129,10 +3357,10 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
|
| while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
|
| ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
|
| (c == '.') || (c == '-') ||
|
| - (c == '_') || (c == ':') ||
|
| + (c == '_') || (c == ':') ||
|
| (IS_COMBINING(c)) ||
|
| (IS_EXTENDER(c)))) {
|
| - if (count++ > 100) {
|
| + if (count++ > XML_PARSER_CHUNK_SIZE) {
|
| count = 0;
|
| GROW;
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| @@ -3141,8 +3369,20 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
|
| len += l;
|
| NEXTL(l);
|
| c = CUR_CHAR(l);
|
| + if (c == 0) {
|
| + count = 0;
|
| + GROW;
|
| + if (ctxt->instate == XML_PARSER_EOF)
|
| + return(NULL);
|
| + c = CUR_CHAR(l);
|
| + }
|
| }
|
| }
|
| + if ((len > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
|
| + return(NULL);
|
| + }
|
| if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
|
| return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
|
| return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
|
| @@ -3192,6 +3432,11 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
|
| in++;
|
| if ((*in > 0) && (*in < 0x80)) {
|
| count = in - ctxt->input->cur;
|
| + if ((count > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
|
| + return(NULL);
|
| + }
|
| ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
|
| ctxt->input->cur = in;
|
| ctxt->nbChars += count;
|
| @@ -3210,6 +3455,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
|
| int len = 0, l;
|
| int c;
|
| int count = 0;
|
| + const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
|
|
|
| #ifdef DEBUG
|
| nbParseNCNameComplex++;
|
| @@ -3219,6 +3465,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
|
| * Handler for more complex cases
|
| */
|
| GROW;
|
| + end = ctxt->input->cur;
|
| c = CUR_CHAR(l);
|
| if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
|
| (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
|
| @@ -3227,7 +3474,12 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
|
|
|
| while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
|
| (xmlIsNameChar(ctxt, c) && (c != ':'))) {
|
| - if (count++ > 100) {
|
| + if (count++ > XML_PARSER_CHUNK_SIZE) {
|
| + if ((len > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
|
| + return(NULL);
|
| + }
|
| count = 0;
|
| GROW;
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| @@ -3235,15 +3487,29 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
|
| }
|
| len += l;
|
| NEXTL(l);
|
| + end = ctxt->input->cur;
|
| c = CUR_CHAR(l);
|
| + if (c == 0) {
|
| + count = 0;
|
| + GROW;
|
| + if (ctxt->instate == XML_PARSER_EOF)
|
| + return(NULL);
|
| + end = ctxt->input->cur;
|
| + c = CUR_CHAR(l);
|
| + }
|
| }
|
| - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
|
| + if ((len > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
|
| + return(NULL);
|
| + }
|
| + return(xmlDictLookup(ctxt->dict, end - len, len));
|
| }
|
|
|
| /**
|
| * xmlParseNCName:
|
| * @ctxt: an XML parser context
|
| - * @len: lenght of the string parsed
|
| + * @len: length of the string parsed
|
| *
|
| * parse an XML name.
|
| *
|
| @@ -3281,6 +3547,11 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) {
|
| in++;
|
| if ((*in > 0) && (*in < 0x80)) {
|
| count = in - ctxt->input->cur;
|
| + if ((count > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
|
| + return(NULL);
|
| + }
|
| ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
|
| ctxt->input->cur = in;
|
| ctxt->nbChars += count;
|
| @@ -3349,7 +3620,7 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
|
| *
|
| * [6] Names ::= Name (#x20 Name)*
|
| *
|
| - * Returns the Name parsed or NULL. The @str pointer
|
| + * Returns the Name parsed or NULL. The @str pointer
|
| * is updated to the current location in the string.
|
| */
|
|
|
| @@ -3393,6 +3664,13 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
|
| while (xmlIsNameChar(ctxt, c)) {
|
| if (len + 10 > max) {
|
| xmlChar *tmp;
|
| +
|
| + if ((len > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
|
| + xmlFree(buffer);
|
| + return(NULL);
|
| + }
|
| max *= 2;
|
| tmp = (xmlChar *) xmlRealloc(buffer,
|
| max * sizeof(xmlChar));
|
| @@ -3412,6 +3690,11 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
|
| return(buffer);
|
| }
|
| }
|
| + if ((len > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
|
| + return(NULL);
|
| + }
|
| *str = cur;
|
| return(xmlStrndup(buf, len));
|
| }
|
| @@ -3446,13 +3729,20 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
|
| c = CUR_CHAR(l);
|
|
|
| while (xmlIsNameChar(ctxt, c)) {
|
| - if (count++ > 100) {
|
| + if (count++ > XML_PARSER_CHUNK_SIZE) {
|
| count = 0;
|
| GROW;
|
| }
|
| COPY_BUF(l,buf,len,c);
|
| NEXTL(l);
|
| c = CUR_CHAR(l);
|
| + if (c == 0) {
|
| + count = 0;
|
| + GROW;
|
| + if (ctxt->instate == XML_PARSER_EOF)
|
| + return(NULL);
|
| + c = CUR_CHAR(l);
|
| + }
|
| if (len >= XML_MAX_NAMELEN) {
|
| /*
|
| * Okay someone managed to make a huge token, so he's ready to pay
|
| @@ -3468,7 +3758,7 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
|
| }
|
| memcpy(buffer, buf, len);
|
| while (xmlIsNameChar(ctxt, c)) {
|
| - if (count++ > 100) {
|
| + if (count++ > XML_PARSER_CHUNK_SIZE) {
|
| count = 0;
|
| GROW;
|
| if (ctxt->instate == XML_PARSER_EOF) {
|
| @@ -3479,6 +3769,12 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
|
| if (len + 10 > max) {
|
| xmlChar *tmp;
|
|
|
| + if ((max > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
|
| + xmlFree(buffer);
|
| + return(NULL);
|
| + }
|
| max *= 2;
|
| tmp = (xmlChar *) xmlRealloc(buffer,
|
| max * sizeof(xmlChar));
|
| @@ -3499,6 +3795,11 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
|
| }
|
| if (len == 0)
|
| return(NULL);
|
| + if ((len > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
|
| + return(NULL);
|
| + }
|
| return(xmlStrndup(buf, len));
|
| }
|
|
|
| @@ -3556,7 +3857,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
|
| * When a parameter entity reference appears in a literal entity
|
| * value, ... a single or double quote character in the replacement
|
| * text is always treated as a normal data character and will not
|
| - * terminate the literal.
|
| + * terminate the literal.
|
| * In practice it means we stop the loop only when back at parsing
|
| * the initial entity and the quote is found
|
| */
|
| @@ -3667,8 +3968,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| xmlChar limit = 0;
|
| xmlChar *buf = NULL;
|
| xmlChar *rep = NULL;
|
| - int len = 0;
|
| - int buf_size = 0;
|
| + size_t len = 0;
|
| + size_t buf_size = 0;
|
| int c, l, in_space = 0;
|
| xmlChar *current = NULL;
|
| xmlEntityPtr ent;
|
| @@ -3690,7 +3991,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| * allocate a translation buffer.
|
| */
|
| buf_size = XML_PARSER_BUFFER_SIZE;
|
| - buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
|
| + buf = (xmlChar *) xmlMallocAtomic(buf_size);
|
| if (buf == NULL) goto mem_error;
|
|
|
| /*
|
| @@ -3700,6 +4001,16 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| while (((NXT(0) != limit) && /* checked */
|
| (IS_CHAR(c)) && (c != '<')) &&
|
| (ctxt->instate != XML_PARSER_EOF)) {
|
| + /*
|
| + * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
|
| + * special option is given
|
| + */
|
| + if ((len > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + goto mem_error;
|
| + }
|
| if (c == 0) break;
|
| if (c == '&') {
|
| in_space = 0;
|
| @@ -3708,7 +4019,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
|
|
| if (val == '&') {
|
| if (ctxt->replaceEntities) {
|
| - if (len > buf_size - 10) {
|
| + if (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| buf[len++] = '&';
|
| @@ -3717,7 +4028,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| * The reparsing will be done in xmlStringGetNodeList()
|
| * called by the attribute() function in SAX.c
|
| */
|
| - if (len > buf_size - 10) {
|
| + if (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| buf[len++] = '&';
|
| @@ -3727,7 +4038,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| buf[len++] = ';';
|
| }
|
| } else if (val != 0) {
|
| - if (len > buf_size - 10) {
|
| + if (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| len += xmlCopyChar(0, &buf[len], val);
|
| @@ -3739,7 +4050,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| ctxt->nbentities += ent->owner;
|
| if ((ent != NULL) &&
|
| (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
|
| - if (len > buf_size - 10) {
|
| + if (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| if ((ctxt->replaceEntities == 0) &&
|
| @@ -3752,7 +4063,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| } else {
|
| buf[len++] = ent->content[0];
|
| }
|
| - } else if ((ent != NULL) &&
|
| + } else if ((ent != NULL) &&
|
| (ctxt->replaceEntities != 0)) {
|
| if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
|
| rep = xmlStringDecodeEntities(ctxt, ent->content,
|
| @@ -3767,7 +4078,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| current++;
|
| } else
|
| buf[len++] = *current++;
|
| - if (len > buf_size - 10) {
|
| + if (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| }
|
| @@ -3775,7 +4086,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| rep = NULL;
|
| }
|
| } else {
|
| - if (len > buf_size - 10) {
|
| + if (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| if (ent->content != NULL)
|
| @@ -3790,10 +4101,16 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| * entities problems
|
| */
|
| if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
|
| - (ent->content != NULL)) {
|
| + (ent->content != NULL) && (ent->checked == 0)) {
|
| + unsigned long oldnbent = ctxt->nbentities;
|
| +
|
| rep = xmlStringDecodeEntities(ctxt, ent->content,
|
| XML_SUBSTITUTE_REF, 0, 0, 0);
|
| +
|
| + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
|
| if (rep != NULL) {
|
| + if (xmlStrchr(rep, '<'))
|
| + ent->checked |= 1;
|
| xmlFree(rep);
|
| rep = NULL;
|
| }
|
| @@ -3803,7 +4120,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| * Just output the reference
|
| */
|
| buf[len++] = '&';
|
| - while (len > buf_size - i - 10) {
|
| + while (len + i + 10 > buf_size) {
|
| growBuffer(buf, i + 10);
|
| }
|
| for (;i > 0;i--)
|
| @@ -3816,7 +4133,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| if ((len != 0) || (!normalize)) {
|
| if ((!normalize) || (!in_space)) {
|
| COPY_BUF(l,buf,len,0x20);
|
| - while (len > buf_size - 10) {
|
| + while (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| }
|
| @@ -3825,7 +4142,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| } else {
|
| in_space = 0;
|
| COPY_BUF(l,buf,len,c);
|
| - if (len > buf_size - 10) {
|
| + if (len + 10 > buf_size) {
|
| growBuffer(buf, 10);
|
| }
|
| }
|
| @@ -3853,7 +4170,18 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
|
| }
|
| } else
|
| NEXT;
|
| - if (attlen != NULL) *attlen = len;
|
| +
|
| + /*
|
| + * There we potentially risk an overflow, don't allow attribute value of
|
| + * length more than INT_MAX it is a very reasonnable assumption !
|
| + */
|
| + if (len >= INT_MAX) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + goto mem_error;
|
| + }
|
| +
|
| + if (attlen != NULL) *attlen = (int) len;
|
| return(buf);
|
|
|
| mem_error:
|
| @@ -3879,20 +4207,20 @@ error:
|
| *
|
| * 3.3.3 Attribute-Value Normalization:
|
| * Before the value of an attribute is passed to the application or
|
| - * checked for validity, the XML processor must normalize it as follows:
|
| + * checked for validity, the XML processor must normalize it as follows:
|
| * - a character reference is processed by appending the referenced
|
| * character to the attribute value
|
| * - an entity reference is processed by recursively processing the
|
| - * replacement text of the entity
|
| + * replacement text of the entity
|
| * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
|
| * appending #x20 to the normalized value, except that only a single
|
| * #x20 is appended for a "#xD#xA" sequence that is part of an external
|
| - * parsed entity or the literal entity value of an internal parsed entity
|
| - * - other characters are processed by appending them to the normalized value
|
| + * parsed entity or the literal entity value of an internal parsed entity
|
| + * - other characters are processed by appending them to the normalized value
|
| * If the declared value is not CDATA, then the XML processor must further
|
| * process the normalized attribute value by discarding any leading and
|
| * trailing space (#x20) characters, and by replacing sequences of space
|
| - * (#x20) characters by a single space (#x20) character.
|
| + * (#x20) characters by a single space (#x20) character.
|
| * All attributes for which no declaration has been read should be treated
|
| * by a non-validating parser as if declared CDATA.
|
| *
|
| @@ -3909,7 +4237,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) {
|
| /**
|
| * xmlParseSystemLiteral:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse an XML Literal
|
| *
|
| * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
|
| @@ -3938,7 +4266,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
|
| xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
|
| return(NULL);
|
| }
|
| -
|
| +
|
| buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
|
| if (buf == NULL) {
|
| xmlErrMemory(ctxt, NULL);
|
| @@ -3950,6 +4278,13 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
|
| if (len + 5 >= size) {
|
| xmlChar *tmp;
|
|
|
| + if ((size > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
|
| + xmlFree(buf);
|
| + ctxt->instate = (xmlParserInputState) state;
|
| + return(NULL);
|
| + }
|
| size *= 2;
|
| tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
|
| if (tmp == NULL) {
|
| @@ -4031,6 +4366,12 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
|
| if (len + 1 >= size) {
|
| xmlChar *tmp;
|
|
|
| + if ((size > XML_MAX_NAME_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
|
| + xmlFree(buf);
|
| + return(NULL);
|
| + }
|
| size *= 2;
|
| tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
|
| if (tmp == NULL) {
|
| @@ -4119,7 +4460,7 @@ static const unsigned char test_char_data[256] = {
|
| * The right angle bracket (>) may be represented using the string ">",
|
| * and must, for compatibility, be escaped using ">" or a character
|
| * reference when it appears in the string "]]>" in content, when that
|
| - * string is not marking the end of a CDATA section.
|
| + * string is not marking the end of a CDATA section.
|
| *
|
| * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
|
| */
|
| @@ -4286,7 +4627,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
|
| GROW;
|
| cur = CUR_CHAR(l);
|
| while ((cur != '<') && /* checked */
|
| - (cur != '&') &&
|
| + (cur != '&') &&
|
| (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
|
| if ((cur == ']') && (NXT(1) == ']') &&
|
| (NXT(2) == '>')) {
|
| @@ -4419,7 +4760,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
|
| }
|
| } else {
|
| /*
|
| - * We handle [83] so we return immediately, if
|
| + * We handle [83] so we return immediately, if
|
| * "S SystemLiteral" is not detected. From a purely parsing
|
| * point of view that's a nice mess.
|
| */
|
| @@ -4428,7 +4769,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
|
|
|
| ptr = CUR_PTR;
|
| if (!IS_BLANK_CH(*ptr)) return(NULL);
|
| -
|
| +
|
| while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
|
| if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
|
| }
|
| @@ -4456,11 +4797,12 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
|
| * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
|
| */
|
| static void
|
| -xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
|
| +xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
|
| + size_t len, size_t size) {
|
| int q, ql;
|
| int r, rl;
|
| int cur, l;
|
| - int count = 0;
|
| + size_t count = 0;
|
| int inputid;
|
|
|
| inputid = ctxt->input->id;
|
| @@ -4506,16 +4848,26 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
|
| if ((r == '-') && (q == '-')) {
|
| xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
|
| }
|
| + if ((len > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
| + "Comment too big found", NULL);
|
| + xmlFree (buf);
|
| + return;
|
| + }
|
| if (len + 5 >= size) {
|
| xmlChar *new_buf;
|
| - size *= 2;
|
| - new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
|
| + size_t new_size;
|
| +
|
| + new_size = size * 2;
|
| + new_buf = (xmlChar *) xmlRealloc(buf, new_size);
|
| if (new_buf == NULL) {
|
| xmlFree (buf);
|
| xmlErrMemory(ctxt, NULL);
|
| return;
|
| }
|
| buf = new_buf;
|
| + size = new_size;
|
| }
|
| COPY_BUF(ql,buf,len,q);
|
| q = r;
|
| @@ -4580,11 +4932,12 @@ not_terminated:
|
| void
|
| xmlParseComment(xmlParserCtxtPtr ctxt) {
|
| xmlChar *buf = NULL;
|
| - int size = XML_PARSER_BUFFER_SIZE;
|
| - int len = 0;
|
| + size_t size = XML_PARSER_BUFFER_SIZE;
|
| + size_t len = 0;
|
| xmlParserInputState state;
|
| const xmlChar *in;
|
| - int nbchar = 0, ccol;
|
| + size_t nbchar = 0;
|
| + int ccol;
|
| int inputid;
|
|
|
| /*
|
| @@ -4664,6 +5017,13 @@ get_more:
|
| buf[len] = 0;
|
| }
|
| }
|
| + if ((len > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
| + "Comment too big found", NULL);
|
| + xmlFree (buf);
|
| + return;
|
| + }
|
| ctxt->input->cur = in;
|
| if (*in == 0xA) {
|
| in++;
|
| @@ -4684,7 +5044,7 @@ get_more:
|
| if (ctxt->instate == XML_PARSER_EOF) {
|
| xmlFree(buf);
|
| return;
|
| - }
|
| + }
|
| in = ctxt->input->cur;
|
| if (*in == '-') {
|
| if (in[1] == '-') {
|
| @@ -4707,13 +5067,14 @@ get_more:
|
| ctxt->instate = state;
|
| return;
|
| }
|
| - if (buf != NULL)
|
| - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
| - "Comment not terminated \n<!--%.50s\n",
|
| + if (buf != NULL) {
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
|
| + "Double hyphen within comment: "
|
| + "<!--%.50s\n",
|
| buf);
|
| - else
|
| - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
| - "Comment not terminated \n", NULL);
|
| + } else
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
|
| + "Double hyphen within comment\n", NULL);
|
| in++;
|
| ctxt->input->col++;
|
| }
|
| @@ -4731,7 +5092,7 @@ get_more:
|
| /**
|
| * xmlParsePITarget:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse the name of a PI
|
| *
|
| * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
|
| @@ -4768,8 +5129,8 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) {
|
| NULL, NULL);
|
| }
|
| if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
|
| - xmlNsErr(ctxt, XML_NS_ERR_COLON,
|
| - "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
|
| + xmlNsErr(ctxt, XML_NS_ERR_COLON,
|
| + "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
|
| }
|
| return(name);
|
| }
|
| @@ -4779,7 +5140,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) {
|
| * xmlParseCatalogPI:
|
| * @ctxt: an XML parser context
|
| * @catalog: the PI value string
|
| - *
|
| + *
|
| * parse an XML Catalog Processing Instruction.
|
| *
|
| * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
|
| @@ -4839,7 +5200,7 @@ error:
|
| /**
|
| * xmlParsePI:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse an XML Processing Instruction.
|
| *
|
| * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
| @@ -4850,8 +5211,8 @@ error:
|
| void
|
| xmlParsePI(xmlParserCtxtPtr ctxt) {
|
| xmlChar *buf = NULL;
|
| - int len = 0;
|
| - int size = XML_PARSER_BUFFER_SIZE;
|
| + size_t len = 0;
|
| + size_t size = XML_PARSER_BUFFER_SIZE;
|
| int cur, l;
|
| const xmlChar *target;
|
| xmlParserInputState state;
|
| @@ -4908,9 +5269,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
|
| ((cur != '?') || (NXT(1) != '>'))) {
|
| if (len + 5 >= size) {
|
| xmlChar *tmp;
|
| -
|
| - size *= 2;
|
| - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
|
| + size_t new_size = size * 2;
|
| + tmp = (xmlChar *) xmlRealloc(buf, new_size);
|
| if (tmp == NULL) {
|
| xmlErrMemory(ctxt, NULL);
|
| xmlFree(buf);
|
| @@ -4918,6 +5278,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
|
| return;
|
| }
|
| buf = tmp;
|
| + size = new_size;
|
| }
|
| count++;
|
| if (count > 50) {
|
| @@ -4927,6 +5288,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
|
| return;
|
| }
|
| count = 0;
|
| + if ((len > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
|
| + "PI %s too big found", target);
|
| + xmlFree(buf);
|
| + ctxt->instate = state;
|
| + return;
|
| + }
|
| }
|
| COPY_BUF(l,buf,len,cur);
|
| NEXTL(l);
|
| @@ -4937,6 +5306,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
|
| cur = CUR_CHAR(l);
|
| }
|
| }
|
| + if ((len > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
|
| + "PI %s too big found", target);
|
| + xmlFree(buf);
|
| + ctxt->instate = state;
|
| + return;
|
| + }
|
| buf[len] = 0;
|
| if (cur != '?') {
|
| xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
|
| @@ -4998,7 +5375,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
|
| const xmlChar *name;
|
| xmlChar *Pubid;
|
| xmlChar *Systemid;
|
| -
|
| +
|
| if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
|
| xmlParserInputPtr input = ctxt->input;
|
| SHRINK;
|
| @@ -5021,8 +5398,8 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
|
| return;
|
| }
|
| if (xmlStrchr(name, ':') != NULL) {
|
| - xmlNsErr(ctxt, XML_NS_ERR_COLON,
|
| - "colon are forbidden from notation names '%s'\n",
|
| + xmlNsErr(ctxt, XML_NS_ERR_COLON,
|
| + "colons are forbidden from notation names '%s'\n",
|
| name, NULL, NULL);
|
| }
|
| SKIP_BLANKS;
|
| @@ -5081,7 +5458,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
|
| int isParameter = 0;
|
| xmlChar *orig = NULL;
|
| int skipped;
|
| -
|
| +
|
| /* GROW; done in the caller */
|
| if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
|
| xmlParserInputPtr input = ctxt->input;
|
| @@ -5110,8 +5487,8 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
|
| return;
|
| }
|
| if (xmlStrchr(name, ':') != NULL) {
|
| - xmlNsErr(ctxt, XML_NS_ERR_COLON,
|
| - "colon are forbidden from entities names '%s'\n",
|
| + xmlNsErr(ctxt, XML_NS_ERR_COLON,
|
| + "colons are forbidden from entities names '%s'\n",
|
| name, NULL, NULL);
|
| }
|
| skipped = SKIP_BLANKS;
|
| @@ -5340,13 +5717,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
|
| *
|
| * [ VC: Fixed Attribute Default ]
|
| * if an attribute has a default value declared with the #FIXED
|
| - * keyword, instances of that attribute must match the default value.
|
| + * keyword, instances of that attribute must match the default value.
|
| *
|
| * [ WFC: No < in Attribute Values ]
|
| * handled in xmlParseAttValue()
|
| *
|
| * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
|
| - * or XML_ATTRIBUTE_FIXED.
|
| + * or XML_ATTRIBUTE_FIXED.
|
| */
|
|
|
| int
|
| @@ -5395,7 +5772,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
|
| *
|
| * [ VC: Notation Attributes ]
|
| * Values of this type must match one of the notation names included
|
| - * in the declaration; all notation names in the declaration must be declared.
|
| + * in the declaration; all notation names in the declaration must be declared.
|
| *
|
| * Returns: the notation attribute tree built while parsing
|
| */
|
| @@ -5595,15 +5972,15 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
|
| * [ VC: Entity Name ]
|
| * Values of type ENTITY must match the Name production, values
|
| * of type ENTITIES must match Names; each Entity Name must match the
|
| - * name of an unparsed entity declared in the DTD.
|
| + * name of an unparsed entity declared in the DTD.
|
| *
|
| * [ VC: Name Token ]
|
| * Values of type NMTOKEN must match the Nmtoken production; values
|
| - * of type NMTOKENS must match Nmtokens.
|
| + * of type NMTOKENS must match Nmtokens.
|
| *
|
| * Returns the attribute type
|
| */
|
| -int
|
| +int
|
| xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
|
| SHRINK;
|
| if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
|
| @@ -5746,7 +6123,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
|
| xmlFreeEnumeration(tree);
|
|
|
| if ((ctxt->sax2) && (defaultValue != NULL) &&
|
| - (def != XML_ATTRIBUTE_IMPLIED) &&
|
| + (def != XML_ATTRIBUTE_IMPLIED) &&
|
| (def != XML_ATTRIBUTE_REQUIRED)) {
|
| xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
|
| }
|
| @@ -5775,7 +6152,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
|
| *
|
| * parse the declaration for a Mixed Element content
|
| * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
|
| - *
|
| + *
|
| * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
|
| * '(' S? '#PCDATA' S? ')'
|
| *
|
| @@ -5783,7 +6160,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
|
| *
|
| * [ VC: No Duplicate Types ]
|
| * The same name must not appear more than once in a single
|
| - * mixed-content declaration.
|
| + * mixed-content declaration.
|
| *
|
| * returns: the list of the xmlElementContentPtr describing the element choices
|
| */
|
| @@ -5883,7 +6260,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
|
| *
|
| * parse the declaration for a Mixed Element content
|
| * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
|
| - *
|
| + *
|
| *
|
| * [47] children ::= (choice | seq) ('?' | '*' | '+')?
|
| *
|
| @@ -5904,7 +6281,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
|
| * be empty, and neither the first nor last non-blank character of
|
| * the replacement text should be a connector (| or ,).
|
| *
|
| - * Returns the tree of xmlElementContentPtr describing the element
|
| + * Returns the tree of xmlElementContentPtr describing the element
|
| * hierarchy.
|
| */
|
| static xmlElementContentPtr
|
| @@ -6217,7 +6594,7 @@ xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
|
| *
|
| * parse the declaration for an Element content either Mixed or Children,
|
| * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
|
| - *
|
| + *
|
| * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
|
| *
|
| * returns: the type of element content XML_ELEMENT_TYPE_xxx
|
| @@ -6345,7 +6722,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
|
| xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
|
| "Element declaration doesn't start and stop in the same entity\n");
|
| }
|
| -
|
| +
|
| NEXT;
|
| if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
| (ctxt->sax->elementDecl != NULL)) {
|
| @@ -6357,7 +6734,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
|
| /*
|
| * this is a trick: if xmlAddElementDecl is called,
|
| * instead of copying the full tree it is plugged directly
|
| - * if called from the parser. Avoid duplicating the
|
| + * if called from the parser. Avoid duplicating the
|
| * interfaces or change the API/ABI
|
| */
|
| xmlFreeDocElementContent(ctxt->myDoc, content);
|
| @@ -6374,8 +6751,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
|
| * xmlParseConditionalSections
|
| * @ctxt: an XML parser context
|
| *
|
| - * [61] conditionalSect ::= includeSect | ignoreSect
|
| - * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
|
| + * [61] conditionalSect ::= includeSect | ignoreSect
|
| + * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
|
| * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
|
| * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
|
| * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
|
| @@ -6527,7 +6904,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
|
| /**
|
| * xmlParseMarkupDecl:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse Markup declarations
|
| *
|
| * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
|
| @@ -6544,7 +6921,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
|
| * In the internal DTD subset, parameter-entity references can occur
|
| * only where markup declarations can occur, not within markup declarations.
|
| * (This does not apply to references that occur in external parameter
|
| - * entities or to the external subset.)
|
| + * entities or to the external subset.)
|
| */
|
| void
|
| xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
|
| @@ -6673,7 +7050,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
| * @ctxt: an XML parser context
|
| * @ExternalID: the external identifier
|
| * @SystemID: the system identifier (or URL)
|
| - *
|
| + *
|
| * parse Markup declarations from an external subset
|
| *
|
| * [30] extSubset ::= textDecl? extSubsetDecl
|
| @@ -6686,7 +7063,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
|
| xmlDetectSAX2(ctxt);
|
| GROW;
|
|
|
| - if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
|
| + if ((ctxt->encoding == NULL) &&
|
| (ctxt->input->end - ctxt->input->cur >= 4)) {
|
| xmlChar start[4];
|
| xmlCharEncoding enc;
|
| @@ -6852,8 +7229,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
| * The first reference to the entity trigger a parsing phase
|
| * where the ent->children is filled with the result from
|
| * the parsing.
|
| - */
|
| - if (ent->checked == 0) {
|
| + * Note: external parsed entities will not be loaded, it is not
|
| + * required for a non-validating parser, unless the parsing option
|
| + * of validating, or substituting entities were given. Doing so is
|
| + * far more secure as the parser will only process data coming from
|
| + * the document entity by default.
|
| + */
|
| + if ((ent->checked == 0) &&
|
| + ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
|
| + (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
|
| unsigned long oldnbent = ctxt->nbentities;
|
|
|
| /*
|
| @@ -6895,13 +7279,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
| * Store the number of entities needing parsing for this entity
|
| * content and do checkings
|
| */
|
| - ent->checked = ctxt->nbentities - oldnbent;
|
| + ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
|
| + if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
|
| + ent->checked |= 1;
|
| if (ret == XML_ERR_ENTITY_LOOP) {
|
| xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
|
| xmlFreeNodeList(list);
|
| return;
|
| }
|
| - if (xmlParserEntityCheck(ctxt, 0, ent)) {
|
| + if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
|
| xmlFreeNodeList(list);
|
| return;
|
| }
|
| @@ -6941,6 +7327,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
| ent->owner = 1;
|
| while (list != NULL) {
|
| list->parent = (xmlNodePtr) ent;
|
| + xmlSetTreeDoc(list, ent->doc);
|
| if (list->next == NULL)
|
| ent->last = list;
|
| list = list->next;
|
| @@ -6954,14 +7341,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
| (ret != XML_WAR_UNDECLARED_ENTITY)) {
|
| xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
|
| "Entity '%s' failed to parse\n", ent->name);
|
| + xmlParserEntityCheck(ctxt, 0, ent, 0);
|
| } else if (list != NULL) {
|
| xmlFreeNodeList(list);
|
| list = NULL;
|
| }
|
| if (ent->checked == 0)
|
| - ent->checked = 1;
|
| + ent->checked = 2;
|
| } else if (ent->checked != 1) {
|
| - ctxt->nbentities += ent->checked;
|
| + ctxt->nbentities += ent->checked / 2;
|
| }
|
|
|
| /*
|
| @@ -7052,14 +7440,19 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
| * Seems we are generating the DOM content, do
|
| * a simple tree copy for all references except the first
|
| * In the first occurrence list contains the replacement.
|
| - * progressive == 2 means we are operating on the Reader
|
| - * and since nodes are discarded we must copy all the time.
|
| */
|
| if (((list == NULL) && (ent->owner == 0)) ||
|
| (ctxt->parseMode == XML_PARSE_READER)) {
|
| xmlNodePtr nw = NULL, cur, firstChild = NULL;
|
|
|
| /*
|
| + * We are copying here, make sure there is no abuse
|
| + */
|
| + ctxt->sizeentcopy += ent->length + 5;
|
| + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
|
| + return;
|
| +
|
| + /*
|
| * when operating on a reader, the entities definitions
|
| * are always owning the entities subtree.
|
| if (ctxt->parseMode == XML_PARSE_READER)
|
| @@ -7096,9 +7489,17 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
| if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
|
| xmlAddEntityReference(ent, firstChild, nw);
|
| #endif /* LIBXML_LEGACY_ENABLED */
|
| - } else if (list == NULL) {
|
| + } else if ((list == NULL) || (ctxt->inputNr > 0)) {
|
| xmlNodePtr nw = NULL, cur, next, last,
|
| firstChild = NULL;
|
| +
|
| + /*
|
| + * We are copying here, make sure there is no abuse
|
| + */
|
| + ctxt->sizeentcopy += ent->length + 5;
|
| + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
|
| + return;
|
| +
|
| /*
|
| * Copy the entity child list and make it the new
|
| * entity child list. The goal is to make sure any
|
| @@ -7215,7 +7616,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
|
| NEXT;
|
|
|
| /*
|
| - * Predefined entites override any extra definition
|
| + * Predefined entities override any extra definition
|
| */
|
| if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
|
| ent = xmlGetPredefinedEntity(name);
|
| @@ -7224,7 +7625,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
|
| }
|
|
|
| /*
|
| - * Increate the number of entity references parsed
|
| + * Increase the number of entity references parsed
|
| */
|
| ctxt->nbentities++;
|
|
|
| @@ -7235,7 +7636,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
|
| if (ctxt->sax != NULL) {
|
| if (ctxt->sax->getEntity != NULL)
|
| ent = ctxt->sax->getEntity(ctxt->userData, name);
|
| - if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
|
| + if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
|
| (ctxt->options & XML_PARSE_OLDSAX))
|
| ent = xmlGetPredefinedEntity(name);
|
| if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
|
| @@ -7281,6 +7682,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
|
| ctxt->sax->reference(ctxt->userData, name);
|
| }
|
| }
|
| + xmlParserEntityCheck(ctxt, 0, ent, 0);
|
| ctxt->valid = 0;
|
| }
|
|
|
| @@ -7308,14 +7710,16 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
|
| * [ WFC: No < in Attribute Values ]
|
| * The replacement text of any entity referred to directly or
|
| * indirectly in an attribute value (other than "<") must
|
| - * not contain a <.
|
| + * not contain a <.
|
| */
|
| else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
|
| - (ent != NULL) && (ent->content != NULL) &&
|
| - (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
|
| - (xmlStrchr(ent->content, '<'))) {
|
| - xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
|
| - "'<' in entity '%s' is not allowed in attributes values\n", name);
|
| + (ent != NULL) &&
|
| + (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
|
| + if (((ent->checked & 1) || (ent->checked == 0)) &&
|
| + (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
|
| + "'<' in entity '%s' is not allowed in attributes values\n", name);
|
| + }
|
| }
|
|
|
| /*
|
| @@ -7337,7 +7741,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
|
| /*
|
| * [ WFC: No Recursion ]
|
| * A parsed entity must not contain a recursive reference
|
| - * to itself, either directly or indirectly.
|
| + * to itself, either directly or indirectly.
|
| * Done somewhere else
|
| */
|
| return(ent);
|
| @@ -7406,7 +7810,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
|
|
|
|
|
| /*
|
| - * Predefined entites override any extra definition
|
| + * Predefined entities override any extra definition
|
| */
|
| if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
|
| ent = xmlGetPredefinedEntity(name);
|
| @@ -7459,7 +7863,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
|
| * is not obligated to read and process their declarations;
|
| * for such documents, the rule that an entity must be
|
| * declared is a well-formedness constraint only if
|
| - * standalone='yes'.
|
| + * standalone='yes'.
|
| */
|
| if (ent == NULL) {
|
| if ((ctxt->standalone == 1) ||
|
| @@ -7472,6 +7876,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
|
| "Entity '%s' not defined\n",
|
| name);
|
| }
|
| + xmlParserEntityCheck(ctxt, 0, ent, 0);
|
| /* TODO ? check regressions ctxt->valid = 0; */
|
| }
|
|
|
| @@ -7550,7 +7955,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
|
| *
|
| * [ WFC: No Recursion ]
|
| * A parsed entity must not contain a recursive
|
| - * reference to itself, either directly or indirectly.
|
| + * reference to itself, either directly or indirectly.
|
| *
|
| * [ WFC: Entity Declared ]
|
| * In a document without any DTD, a document with only an internal DTD
|
| @@ -7631,6 +8036,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
|
| name, NULL);
|
| ctxt->valid = 0;
|
| }
|
| + xmlParserEntityCheck(ctxt, 0, NULL, 0);
|
| } else {
|
| /*
|
| * Internal checking in case the entity quest barfed
|
| @@ -7732,7 +8138,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
|
| while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
|
| (IS_CHAR(c))) {
|
| xmlBufferAdd(buf, ctxt->input->cur, l);
|
| - if (count++ > 100) {
|
| + if (count++ > XML_PARSER_CHUNK_SIZE) {
|
| count = 0;
|
| GROW;
|
| if (ctxt->instate == XML_PARSER_EOF) {
|
| @@ -7742,6 +8148,15 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
|
| }
|
| NEXTL(l);
|
| c = CUR_CHAR(l);
|
| + if (c == 0) {
|
| + count = 0;
|
| + GROW;
|
| + if (ctxt->instate == XML_PARSER_EOF) {
|
| + xmlBufferFree(buf);
|
| + return(-1);
|
| + }
|
| + c = CUR_CHAR(l);
|
| + }
|
| }
|
|
|
| if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
|
| @@ -7861,6 +8276,7 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
|
| name, NULL);
|
| ctxt->valid = 0;
|
| }
|
| + xmlParserEntityCheck(ctxt, 0, NULL, 0);
|
| } else {
|
| /*
|
| * Internal checking in case the entity quest barfed
|
| @@ -7884,12 +8300,12 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
|
| *
|
| * parse a DOCTYPE declaration
|
| *
|
| - * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
|
| + * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
|
| * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
|
| *
|
| * [ VC: Root Element Type ]
|
| * The Name in the document type declaration must match the element
|
| - * type of the root element.
|
| + * type of the root element.
|
| */
|
|
|
| void
|
| @@ -7973,7 +8389,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
|
| ctxt->instate = XML_PARSER_DTD;
|
| NEXT;
|
| /*
|
| - * Parse the succession of Markup declarations and
|
| + * Parse the succession of Markup declarations and
|
| * PEReferences.
|
| * Subsequence (markupdecl | PEReference | S)*
|
| */
|
| @@ -7997,7 +8413,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
|
| break;
|
| }
|
| }
|
| - if (RAW == ']') {
|
| + if (RAW == ']') {
|
| NEXT;
|
| SKIP_BLANKS;
|
| }
|
| @@ -8028,8 +8444,8 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
|
| *
|
| * [ WFC: No < in Attribute Values ]
|
| * The replacement text of any entity referred to directly or indirectly in
|
| - * an attribute value (other than "<") must not contain a <.
|
| - *
|
| + * an attribute value (other than "<") must not contain a <.
|
| + *
|
| * [ VC: Attribute Value Type ]
|
| * The attribute must have been declared; the value must be of the type
|
| * declared for it.
|
| @@ -8110,7 +8526,7 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
|
| /**
|
| * xmlParseStartTag:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse a start of tag either for rule element or
|
| * EmptyElement. In both case we don't parse the tag closing chars.
|
| *
|
| @@ -8118,13 +8534,13 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
|
| *
|
| * [ WFC: Unique Att Spec ]
|
| * No attribute name may appear more than once in the same start-tag or
|
| - * empty-element tag.
|
| + * empty-element tag.
|
| *
|
| * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
|
| *
|
| * [ WFC: Unique Att Spec ]
|
| * No attribute name may appear more than once in the same start-tag or
|
| - * empty-element tag.
|
| + * empty-element tag.
|
| *
|
| * With namespace:
|
| *
|
| @@ -8163,7 +8579,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
|
| SKIP_BLANKS;
|
| GROW;
|
|
|
| - while (((RAW != '>') &&
|
| + while (((RAW != '>') &&
|
| ((RAW != '/') || (NXT(1) != '>')) &&
|
| (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
|
| const xmlChar *q = CUR_PTR;
|
| @@ -8174,7 +8590,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
|
| /*
|
| * [ WFC: Unique Att Spec ]
|
| * No attribute name may appear more than once in the same
|
| - * start-tag or empty-element tag.
|
| + * start-tag or empty-element tag.
|
| */
|
| for (i = 0; i < nbatts;i += 2) {
|
| if (xmlStrEqual(atts[i], attname)) {
|
| @@ -8223,7 +8639,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
|
| xmlFree(attvalue);
|
| }
|
|
|
| -failed:
|
| +failed:
|
|
|
| GROW
|
| if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
|
| @@ -8305,7 +8721,7 @@ xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
|
| /*
|
| * [ WFC: Element Type Match ]
|
| * The Name in an element's end-tag must match the element type in the
|
| - * start-tag.
|
| + * start-tag.
|
| *
|
| */
|
| if (name != (xmlChar*)1) {
|
| @@ -8401,7 +8817,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
|
| if (CUR == ':') {
|
| l = xmlParseName(ctxt);
|
| if (l != NULL) {
|
| - xmlNsErr(ctxt, XML_NS_ERR_QNAME,
|
| + xmlNsErr(ctxt, XML_NS_ERR_QNAME,
|
| "Failed to parse QName '%s'\n", l, NULL, NULL);
|
| *prefix = NULL;
|
| return(l);
|
| @@ -8484,7 +8900,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
|
|
|
| cmp = prefix;
|
| while (*in != 0 && *in == *cmp) {
|
| - ++in;
|
| + ++in;
|
| ++cmp;
|
| }
|
| if ((*cmp == 0) && (*in == ':')) {
|
| @@ -8522,20 +8938,20 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
|
| *
|
| * 3.3.3 Attribute-Value Normalization:
|
| * Before the value of an attribute is passed to the application or
|
| - * checked for validity, the XML processor must normalize it as follows:
|
| + * checked for validity, the XML processor must normalize it as follows:
|
| * - a character reference is processed by appending the referenced
|
| * character to the attribute value
|
| * - an entity reference is processed by recursively processing the
|
| - * replacement text of the entity
|
| + * replacement text of the entity
|
| * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
|
| * appending #x20 to the normalized value, except that only a single
|
| * #x20 is appended for a "#xD#xA" sequence that is part of an external
|
| - * parsed entity or the literal entity value of an internal parsed entity
|
| - * - other characters are processed by appending them to the normalized value
|
| + * parsed entity or the literal entity value of an internal parsed entity
|
| + * - other characters are processed by appending them to the normalized value
|
| * If the declared value is not CDATA, then the XML processor must further
|
| * process the normalized attribute value by discarding any leading and
|
| * trailing space (#x20) characters, and by replacing sequences of space
|
| - * (#x20) characters by a single space (#x20) character.
|
| + * (#x20) characters by a single space (#x20) character.
|
| * All attributes for which no declaration has been read should be treated
|
| * by a non-validating parser as if declared CDATA.
|
| *
|
| @@ -8550,9 +8966,12 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| xmlChar limit = 0;
|
| const xmlChar *in = NULL, *start, *end, *last;
|
| xmlChar *ret = NULL;
|
| + int line, col;
|
|
|
| GROW;
|
| in = (xmlChar *) CUR_PTR;
|
| + line = ctxt->input->line;
|
| + col = ctxt->input->col;
|
| if (*in != '"' && *in != '\'') {
|
| xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
|
| return (NULL);
|
| @@ -8565,6 +8984,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| * pure ASCII.
|
| */
|
| limit = *in++;
|
| + col++;
|
| end = ctxt->input->end;
|
| start = in;
|
| if (in >= end) {
|
| @@ -8581,9 +9001,14 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| /*
|
| * Skip any leading spaces
|
| */
|
| - while ((in < end) && (*in != limit) &&
|
| + while ((in < end) && (*in != limit) &&
|
| ((*in == 0x20) || (*in == 0x9) ||
|
| (*in == 0xA) || (*in == 0xD))) {
|
| + if (*in == 0xA) {
|
| + line++; col = 1;
|
| + } else {
|
| + col++;
|
| + }
|
| in++;
|
| start = in;
|
| if (in >= end) {
|
| @@ -8597,10 +9022,17 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| in = in + delta;
|
| }
|
| end = ctxt->input->end;
|
| + if (((in - start) > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + return(NULL);
|
| + }
|
| }
|
| }
|
| while ((in < end) && (*in != limit) && (*in >= 0x20) &&
|
| (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
|
| + col++;
|
| if ((*in++ == 0x20) && (*in == 0x20)) break;
|
| if (in >= end) {
|
| const xmlChar *oldbase = ctxt->input->base;
|
| @@ -8613,6 +9045,12 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| in = in + delta;
|
| }
|
| end = ctxt->input->end;
|
| + if (((in - start) > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + return(NULL);
|
| + }
|
| }
|
| }
|
| last = in;
|
| @@ -8620,9 +9058,14 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| * skip the trailing blanks
|
| */
|
| while ((last[-1] == 0x20) && (last > start)) last--;
|
| - while ((in < end) && (*in != limit) &&
|
| + while ((in < end) && (*in != limit) &&
|
| ((*in == 0x20) || (*in == 0x9) ||
|
| (*in == 0xA) || (*in == 0xD))) {
|
| + if (*in == 0xA) {
|
| + line++, col = 1;
|
| + } else {
|
| + col++;
|
| + }
|
| in++;
|
| if (in >= end) {
|
| const xmlChar *oldbase = ctxt->input->base;
|
| @@ -8636,13 +9079,26 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| last = last + delta;
|
| }
|
| end = ctxt->input->end;
|
| + if (((in - start) > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + return(NULL);
|
| + }
|
| }
|
| }
|
| + if (((in - start) > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + return(NULL);
|
| + }
|
| if (*in != limit) goto need_complex;
|
| } else {
|
| while ((in < end) && (*in != limit) && (*in >= 0x20) &&
|
| (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
|
| in++;
|
| + col++;
|
| if (in >= end) {
|
| const xmlChar *oldbase = ctxt->input->base;
|
| GROW;
|
| @@ -8654,12 +9110,25 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| in = in + delta;
|
| }
|
| end = ctxt->input->end;
|
| + if (((in - start) > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + return(NULL);
|
| + }
|
| }
|
| }
|
| last = in;
|
| + if (((in - start) > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
|
| + "AttValue length too long\n");
|
| + return(NULL);
|
| + }
|
| if (*in != limit) goto need_complex;
|
| }
|
| in++;
|
| + col++;
|
| if (len != NULL) {
|
| *len = last - start;
|
| ret = (xmlChar *) start;
|
| @@ -8668,6 +9137,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
|
| ret = xmlStrndup(start, last - start);
|
| }
|
| CUR_PTR = in;
|
| + ctxt->input->line = line;
|
| + ctxt->input->col = col;
|
| if (alloc) *alloc = 0;
|
| return ret;
|
| need_complex:
|
| @@ -8795,7 +9266,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
|
| /**
|
| * xmlParseStartTag2:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse a start of tag either for rule element or
|
| * EmptyElement. In both case we don't parse the tag closing chars.
|
| * This routine is called when running SAX2 parsing
|
| @@ -8804,13 +9275,13 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
|
| *
|
| * [ WFC: Unique Att Spec ]
|
| * No attribute name may appear more than once in the same start-tag or
|
| - * empty-element tag.
|
| + * empty-element tag.
|
| *
|
| * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
|
| *
|
| * [ WFC: Unique Att Spec ]
|
| * No attribute name may appear more than once in the same start-tag or
|
| - * empty-element tag.
|
| + * empty-element tag.
|
| *
|
| * With namespace:
|
| *
|
| @@ -8879,7 +9350,7 @@ reparse:
|
| GROW;
|
| if (ctxt->input->base != base) goto base_changed;
|
|
|
| - while (((RAW != '>') &&
|
| + while (((RAW != '>') &&
|
| ((RAW != '/') || (NXT(1) != '>')) &&
|
| (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
|
| const xmlChar *q = CUR_PTR;
|
| @@ -8900,6 +9371,12 @@ reparse:
|
| const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
|
| xmlURIPtr uri;
|
|
|
| + if (URL == NULL) {
|
| + xmlErrMemory(ctxt, "dictionary allocation failure");
|
| + if ((attvalue != NULL) && (alloc != 0))
|
| + xmlFree(attvalue);
|
| + return(NULL);
|
| + }
|
| if (*URL != 0) {
|
| uri = xmlParseURI((const char *) URL);
|
| if (uri == NULL) {
|
| @@ -8943,6 +9420,13 @@ reparse:
|
| if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
|
| skip_default_ns:
|
| if (alloc != 0) xmlFree(attvalue);
|
| + if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
|
| + break;
|
| + if (!IS_BLANK_CH(RAW)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
|
| + "attributes construct error\n");
|
| + break;
|
| + }
|
| SKIP_BLANKS;
|
| continue;
|
| }
|
| @@ -9016,6 +9500,13 @@ skip_default_ns:
|
| if (nsPush(ctxt, attname, URL) > 0) nbNs++;
|
| skip_ns:
|
| if (alloc != 0) xmlFree(attvalue);
|
| + if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
|
| + break;
|
| + if (!IS_BLANK_CH(RAW)) {
|
| + xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
|
| + "attributes construct error\n");
|
| + break;
|
| + }
|
| SKIP_BLANKS;
|
| if (ctxt->input->base != base) goto base_changed;
|
| continue;
|
| @@ -9145,7 +9636,7 @@ failed:
|
| atts[nbatts++] = defaults->values[5 * i + 3];
|
| if ((ctxt->standalone == 1) &&
|
| (defaults->values[5 * i + 4] != NULL)) {
|
| - xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
|
| + xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
|
| "standalone: attribute %s on %s defaulted from external subset\n",
|
| attname, localname);
|
| }
|
| @@ -9175,7 +9666,7 @@ failed:
|
| /*
|
| * [ WFC: Unique Att Spec ]
|
| * No attribute name may appear more than once in the same
|
| - * start-tag or empty-element tag.
|
| + * start-tag or empty-element tag.
|
| * As extended by the Namespace in XML REC.
|
| */
|
| for (j = 0; j < i;j += 5) {
|
| @@ -9276,9 +9767,11 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
|
| if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
|
| if (ctxt->input->cur[tlen] == '>') {
|
| ctxt->input->cur += tlen + 1;
|
| + ctxt->input->col += tlen + 1;
|
| goto done;
|
| }
|
| ctxt->input->cur += tlen;
|
| + ctxt->input->col += tlen;
|
| name = (xmlChar*)1;
|
| } else {
|
| if (prefix == NULL)
|
| @@ -9302,7 +9795,7 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
|
| /*
|
| * [ WFC: Element Type Match ]
|
| * The Name in an element's end-tag must match the element type in the
|
| - * start-tag.
|
| + * start-tag.
|
| *
|
| */
|
| if (name != (xmlChar*)1) {
|
| @@ -9331,7 +9824,7 @@ done:
|
| /**
|
| * xmlParseCDSect:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * Parse escaped pure raw content.
|
| *
|
| * [18] CDSect ::= CDStart CData CDEnd
|
| @@ -9384,14 +9877,21 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
|
| if (len + 5 >= size) {
|
| xmlChar *tmp;
|
|
|
| - size *= 2;
|
| - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
|
| + if ((size > XML_MAX_TEXT_LENGTH) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
|
| + "CData section too big found", NULL);
|
| + xmlFree (buf);
|
| + return;
|
| + }
|
| + tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
|
| if (tmp == NULL) {
|
| xmlFree(buf);
|
| xmlErrMemory(ctxt, NULL);
|
| return;
|
| }
|
| buf = tmp;
|
| + size *= 2;
|
| }
|
| COPY_BUF(rl,buf,len,r);
|
| r = s;
|
| @@ -9484,7 +9984,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
|
|
|
| /*
|
| * Fifth case : a reference. If if has not been resolved,
|
| - * parsing returns it's Name, create the node
|
| + * parsing returns it's Name, create the node
|
| */
|
|
|
| else if (*cur == '&') {
|
| @@ -9525,7 +10025,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
|
| *
|
| * [ WFC: Element Type Match ]
|
| * The Name in an element's end-tag must match the element type in the
|
| - * start-tag.
|
| + * start-tag.
|
| *
|
| */
|
|
|
| @@ -9535,7 +10035,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
|
| const xmlChar *prefix = NULL;
|
| const xmlChar *URI = NULL;
|
| xmlParserNodeInfo node_info;
|
| - int line, tlen;
|
| + int line, tlen = 0;
|
| xmlNodePtr ret;
|
| int nsNr = ctxt->nsNr;
|
|
|
| @@ -9584,7 +10084,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
|
| /*
|
| * [ VC: Root Element Type ]
|
| * The Name in the document type declaration must match the element
|
| - * type of the root element.
|
| + * type of the root element.
|
| */
|
| if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
|
| ctxt->node && (ctxt->node == ctxt->myDoc->children))
|
| @@ -9867,7 +10367,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) {
|
| /**
|
| * xmlParseEncodingDecl:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse the XML encoding declaration
|
| *
|
| * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
|
| @@ -9908,6 +10408,15 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
|
| } else {
|
| xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
|
| }
|
| +
|
| + /*
|
| + * Non standard parsing, allowing the user to ignore encoding
|
| + */
|
| + if (ctxt->options & XML_PARSE_IGNORE_ENC) {
|
| + xmlFree((xmlChar *) encoding);
|
| + return(NULL);
|
| + }
|
| +
|
| /*
|
| * UTF-16 encoding stwich has already taken place at this stage,
|
| * more over the little-endian/big-endian selection is already done
|
| @@ -9917,7 +10426,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
|
| (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
|
| /*
|
| * If no encoding was passed to the parser, that we are
|
| - * using UTF-16 and no decoder is present i.e. the
|
| + * using UTF-16 and no decoder is present i.e. the
|
| * document is apparently UTF-8 compatible, then raise an
|
| * encoding mismatch fatal error
|
| */
|
| @@ -9968,7 +10477,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
|
| * parse the XML standalone declaration
|
| *
|
| * [32] SDDecl ::= S 'standalone' Eq
|
| - * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
|
| + * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
|
| *
|
| * [ VC: Standalone Document Declaration ]
|
| * TODO The standalone document declaration must have the value "no"
|
| @@ -10048,7 +10557,7 @@ xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
|
| /**
|
| * xmlParseXMLDecl:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse an XML declaration header
|
| *
|
| * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
| @@ -10162,7 +10671,7 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
|
| /**
|
| * xmlParseMisc:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse an XML Misc* optional field.
|
| *
|
| * [27] Misc ::= Comment | PI | S
|
| @@ -10186,7 +10695,7 @@ xmlParseMisc(xmlParserCtxtPtr ctxt) {
|
| /**
|
| * xmlParseDocument:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse an XML document (and build a tree if using the standard SAX
|
| * interface).
|
| *
|
| @@ -10223,9 +10732,9 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| return(-1);
|
|
|
| - if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
|
| + if ((ctxt->encoding == NULL) &&
|
| ((ctxt->input->end - ctxt->input->cur) >= 4)) {
|
| - /*
|
| + /*
|
| * Get the 4 first bytes and decode the charset
|
| * if enc != XML_CHAR_ENCODING_NONE
|
| * plug some encoding conversion routines.
|
| @@ -10275,6 +10784,10 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
| ctxt->sax->startDocument(ctxt->userData);
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| return(-1);
|
| + if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
|
| + (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
|
| + ctxt->myDoc->compression = ctxt->input->buf->compressed;
|
| + }
|
|
|
| /*
|
| * The Misc part of the Prolog
|
| @@ -10374,7 +10887,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
| /**
|
| * xmlParseExtParsedEnt:
|
| * @ctxt: an XML parser context
|
| - *
|
| + *
|
| * parse a general parsed entity
|
| * An external general parsed entity is well-formed if it matches the
|
| * production labeled extParsedEnt.
|
| @@ -10405,7 +10918,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
|
| if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
|
| ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
|
|
|
| - /*
|
| + /*
|
| * Get the 4 first bytes and decode the charset
|
| * if enc != XML_CHAR_ENCODING_NONE
|
| * plug some encoding conversion routines.
|
| @@ -10462,7 +10975,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
|
| xmlParseContent(ctxt);
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| return(-1);
|
| -
|
| +
|
| if ((RAW == '<') && (NXT(1) == '/')) {
|
| xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
|
| } else if (RAW != 0) {
|
| @@ -10482,7 +10995,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
|
| #ifdef LIBXML_PUSH_ENABLED
|
| /************************************************************************
|
| * *
|
| - * Progressive parsing interfaces *
|
| + * Progressive parsing interfaces *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -10519,8 +11032,8 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
|
| buf = in->base;
|
| len = in->length;
|
| } else {
|
| - buf = in->buf->buffer->content;
|
| - len = in->buf->buffer->use;
|
| + buf = xmlBufContent(in->buf->buffer);
|
| + len = xmlBufUse(in->buf->buffer);
|
| }
|
| /* take into account the sequence length */
|
| if (third) len -= 2;
|
| @@ -10543,7 +11056,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: lookup '%c%c' found at %d\n",
|
| first, next, base);
|
| - else
|
| + else
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: lookup '%c%c%c' found at %d\n",
|
| first, next, third, base);
|
| @@ -10559,7 +11072,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
|
| else if (third == 0)
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: lookup '%c%c' failed\n", first, next);
|
| - else
|
| + else
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: lookup '%c%c%c' failed\n", first, next, third);
|
| #endif
|
| @@ -10641,7 +11154,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) {
|
|
|
| if ((utf == NULL) || (len <= 0))
|
| return(0);
|
| -
|
| +
|
| for (ix = 0; ix < len;) { /* string is 0-terminated */
|
| c = utf[ix];
|
| if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
|
| @@ -10773,7 +11286,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
|
| return(0);
|
|
|
| -
|
| +
|
| /*
|
| * Pop-up of finished entities.
|
| */
|
| @@ -10788,22 +11301,22 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| /*
|
| * If we are operating on converted input, try to flush
|
| * remainng chars to avoid them stalling in the non-converted
|
| - * buffer.
|
| + * buffer. But do not do this in document start where
|
| + * encoding="..." may not have been read and we work on a
|
| + * guessed encoding.
|
| */
|
| - if ((ctxt->input->buf->raw != NULL) &&
|
| - (ctxt->input->buf->raw->use > 0)) {
|
| - int base = ctxt->input->base -
|
| - ctxt->input->buf->buffer->content;
|
| - int current = ctxt->input->cur - ctxt->input->base;
|
| + if ((ctxt->instate != XML_PARSER_START) &&
|
| + (ctxt->input->buf->raw != NULL) &&
|
| + (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
|
| + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
|
| + ctxt->input);
|
| + size_t current = ctxt->input->cur - ctxt->input->base;
|
|
|
| xmlParserInputBufferPush(ctxt->input->buf, 0, "");
|
| - ctxt->input->base = ctxt->input->buf->buffer->content + base;
|
| - ctxt->input->cur = ctxt->input->base + current;
|
| - ctxt->input->end =
|
| - &ctxt->input->buf->buffer->content[
|
| - ctxt->input->buf->buffer->use];
|
| + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
|
| + base, current);
|
| }
|
| - avail = ctxt->input->buf->buffer->use -
|
| + avail = xmlBufUse(ctxt->input->buf->buffer) -
|
| (ctxt->input->cur - ctxt->input->base);
|
| }
|
| if (avail < 1)
|
| @@ -10825,7 +11338,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (avail < 4)
|
| goto done;
|
|
|
| - /*
|
| + /*
|
| * Get the 4 first bytes and decode the charset
|
| * if enc != XML_CHAR_ENCODING_NONE
|
| * plug some encoding conversion routines,
|
| @@ -10980,7 +11493,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| /*
|
| * [ VC: Root Element Type ]
|
| * The Name in the document type declaration must match
|
| - * the element type of the root element.
|
| + * the element type of the root element.
|
| */
|
| if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
|
| ctxt->node && (ctxt->node == ctxt->myDoc->children))
|
| @@ -11017,6 +11530,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| } else {
|
| ctxt->instate = XML_PARSER_CONTENT;
|
| }
|
| + ctxt->progressive = 1;
|
| break;
|
| }
|
| if (RAW == '>') {
|
| @@ -11036,6 +11550,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| #endif /* LIBXML_SAX1_ENABLED */
|
|
|
| ctxt->instate = XML_PARSER_CONTENT;
|
| + ctxt->progressive = 1;
|
| break;
|
| }
|
| case XML_PARSER_CONTENT: {
|
| @@ -11053,9 +11568,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| break;
|
| } else if ((cur == '<') && (next == '?')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
|
| + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
|
| + ctxt->progressive = XML_PARSER_PI;
|
| goto done;
|
| + }
|
| xmlParsePI(ctxt);
|
| + ctxt->instate = XML_PARSER_CONTENT;
|
| + ctxt->progressive = 1;
|
| } else if ((cur == '<') && (next != '!')) {
|
| ctxt->instate = XML_PARSER_START_TAG;
|
| break;
|
| @@ -11069,10 +11588,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| ctxt->input->cur += 4;
|
| term = xmlParseLookupSequence(ctxt, '-', '-', '>');
|
| ctxt->input->cur -= 4;
|
| - if ((!terminate) && (term < 0))
|
| + if ((!terminate) && (term < 0)) {
|
| + ctxt->progressive = XML_PARSER_COMMENT;
|
| goto done;
|
| + }
|
| xmlParseComment(ctxt);
|
| ctxt->instate = XML_PARSER_CONTENT;
|
| + ctxt->progressive = 1;
|
| } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
|
| (ctxt->input->cur[2] == '[') &&
|
| (ctxt->input->cur[3] == 'C') &&
|
| @@ -11167,7 +11689,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| break;
|
| case XML_PARSER_CDATA_SECTION: {
|
| /*
|
| - * The Push mode need to have the SAX callback for
|
| + * The Push mode need to have the SAX callback for
|
| * cdataBlock merge back contiguous callbacks.
|
| */
|
| int base;
|
| @@ -11177,7 +11699,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
|
| int tmp;
|
|
|
| - tmp = xmlCheckCdataPush(ctxt->input->cur,
|
| + tmp = xmlCheckCdataPush(ctxt->input->cur,
|
| XML_PARSER_BIG_BUFFER_SIZE);
|
| if (tmp < 0) {
|
| tmp = -tmp;
|
| @@ -11247,7 +11769,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| avail = ctxt->input->length -
|
| (ctxt->input->cur - ctxt->input->base);
|
| else
|
| - avail = ctxt->input->buf->buffer->use -
|
| + avail = xmlBufUse(ctxt->input->buf->buffer) -
|
| (ctxt->input->cur - ctxt->input->base);
|
| if (avail < 2)
|
| goto done;
|
| @@ -11255,8 +11777,10 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| next = ctxt->input->cur[1];
|
| if ((cur == '<') && (next == '?')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
|
| + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
|
| + ctxt->progressive = XML_PARSER_PI;
|
| goto done;
|
| + }
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: Parsing PI\n");
|
| @@ -11264,13 +11788,17 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| xmlParsePI(ctxt);
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| + ctxt->instate = XML_PARSER_MISC;
|
| + ctxt->progressive = 1;
|
| ctxt->checkIndex = 0;
|
| } else if ((cur == '<') && (next == '!') &&
|
| (ctxt->input->cur[2] == '-') &&
|
| (ctxt->input->cur[3] == '-')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
|
| + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
|
| + ctxt->progressive = XML_PARSER_COMMENT;
|
| goto done;
|
| + }
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: Parsing Comment\n");
|
| @@ -11279,6 +11807,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| ctxt->instate = XML_PARSER_MISC;
|
| + ctxt->progressive = 1;
|
| ctxt->checkIndex = 0;
|
| } else if ((cur == '<') && (next == '!') &&
|
| (ctxt->input->cur[2] == 'D') &&
|
| @@ -11289,13 +11818,17 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| (ctxt->input->cur[7] == 'P') &&
|
| (ctxt->input->cur[8] == 'E')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
| + (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
|
| + ctxt->progressive = XML_PARSER_DTD;
|
| goto done;
|
| + }
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: Parsing internal subset\n");
|
| #endif
|
| ctxt->inSubset = 1;
|
| + ctxt->progressive = 0;
|
| + ctxt->checkIndex = 0;
|
| xmlParseDocTypeDecl(ctxt);
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| @@ -11328,7 +11861,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| goto done;
|
| } else {
|
| ctxt->instate = XML_PARSER_START_TAG;
|
| - ctxt->progressive = 1;
|
| + ctxt->progressive = XML_PARSER_START_TAG;
|
| xmlParseGetLasts(ctxt, &lastlt, &lastgt);
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| @@ -11341,15 +11874,18 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (ctxt->input->buf == NULL)
|
| avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
|
| else
|
| - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
|
| - if (avail < 2)
|
| + avail = xmlBufUse(ctxt->input->buf->buffer) -
|
| + (ctxt->input->cur - ctxt->input->base);
|
| + if (avail < 2)
|
| goto done;
|
| cur = ctxt->input->cur[0];
|
| next = ctxt->input->cur[1];
|
| if ((cur == '<') && (next == '?')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
|
| + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
|
| + ctxt->progressive = XML_PARSER_PI;
|
| goto done;
|
| + }
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: Parsing PI\n");
|
| @@ -11357,11 +11893,15 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| xmlParsePI(ctxt);
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| + ctxt->instate = XML_PARSER_PROLOG;
|
| + ctxt->progressive = 1;
|
| } else if ((cur == '<') && (next == '!') &&
|
| (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
|
| + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
|
| + ctxt->progressive = XML_PARSER_COMMENT;
|
| goto done;
|
| + }
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: Parsing Comment\n");
|
| @@ -11370,13 +11910,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| ctxt->instate = XML_PARSER_PROLOG;
|
| + ctxt->progressive = 1;
|
| } else if ((cur == '<') && (next == '!') &&
|
| (avail < 4)) {
|
| goto done;
|
| } else {
|
| ctxt->instate = XML_PARSER_START_TAG;
|
| if (ctxt->progressive == 0)
|
| - ctxt->progressive = 1;
|
| + ctxt->progressive = XML_PARSER_START_TAG;
|
| xmlParseGetLasts(ctxt, &lastlt, &lastgt);
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| @@ -11389,15 +11930,18 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (ctxt->input->buf == NULL)
|
| avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
|
| else
|
| - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
|
| + avail = xmlBufUse(ctxt->input->buf->buffer) -
|
| + (ctxt->input->cur - ctxt->input->base);
|
| if (avail < 2)
|
| goto done;
|
| cur = ctxt->input->cur[0];
|
| next = ctxt->input->cur[1];
|
| if ((cur == '<') && (next == '?')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
|
| + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
|
| + ctxt->progressive = XML_PARSER_PI;
|
| goto done;
|
| + }
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: Parsing PI\n");
|
| @@ -11406,11 +11950,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| ctxt->instate = XML_PARSER_EPILOG;
|
| + ctxt->progressive = 1;
|
| } else if ((cur == '<') && (next == '!') &&
|
| (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
|
| if ((!terminate) &&
|
| - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
|
| + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
|
| + ctxt->progressive = XML_PARSER_COMMENT;
|
| goto done;
|
| + }
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext,
|
| "PP: Parsing Comment\n");
|
| @@ -11419,6 +11966,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| ctxt->instate = XML_PARSER_EPILOG;
|
| + ctxt->progressive = 1;
|
| } else if ((cur == '<') && (next == '!') &&
|
| (avail < 4)) {
|
| goto done;
|
| @@ -11448,29 +11996,28 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| int base, i;
|
| xmlChar *buf;
|
| xmlChar quote = 0;
|
| + size_t use;
|
|
|
| base = ctxt->input->cur - ctxt->input->base;
|
| if (base < 0) return(0);
|
| if (ctxt->checkIndex > base)
|
| base = ctxt->checkIndex;
|
| - buf = ctxt->input->buf->buffer->content;
|
| - for (;(unsigned int) base < ctxt->input->buf->buffer->use;
|
| - base++) {
|
| + buf = xmlBufContent(ctxt->input->buf->buffer);
|
| + use = xmlBufUse(ctxt->input->buf->buffer);
|
| + for (;(unsigned int) base < use; base++) {
|
| if (quote != 0) {
|
| if (buf[base] == quote)
|
| quote = 0;
|
| - continue;
|
| + continue;
|
| }
|
| if ((quote == 0) && (buf[base] == '<')) {
|
| int found = 0;
|
| /* special handling of comments */
|
| - if (((unsigned int) base + 4 <
|
| - ctxt->input->buf->buffer->use) &&
|
| + if (((unsigned int) base + 4 < use) &&
|
| (buf[base + 1] == '!') &&
|
| (buf[base + 2] == '-') &&
|
| (buf[base + 3] == '-')) {
|
| - for (;(unsigned int) base + 3 <
|
| - ctxt->input->buf->buffer->use; base++) {
|
| + for (;(unsigned int) base + 3 < use; base++) {
|
| if ((buf[base] == '-') &&
|
| (buf[base + 1] == '-') &&
|
| (buf[base + 2] == '>')) {
|
| @@ -11501,17 +12048,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| fprintf(stderr, "%c%c%c%c: ", buf[base],
|
| buf[base + 1], buf[base + 2], buf[base + 3]);
|
| #endif
|
| - if ((unsigned int) base +1 >=
|
| - ctxt->input->buf->buffer->use)
|
| + if ((unsigned int) base +1 >= use)
|
| break;
|
| if (buf[base + 1] == ']') {
|
| /* conditional crap, skip both ']' ! */
|
| base++;
|
| continue;
|
| }
|
| - for (i = 1;
|
| - (unsigned int) base + i < ctxt->input->buf->buffer->use;
|
| - i++) {
|
| + for (i = 1; (unsigned int) base + i < use; i++) {
|
| if (buf[base + i] == '>') {
|
| #if 0
|
| fprintf(stderr, "found\n");
|
| @@ -11529,7 +12073,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
| fprintf(stderr, "end of stream\n");
|
| #endif
|
| break;
|
| -
|
| +
|
| }
|
| not_end_of_int_subset:
|
| continue; /* for */
|
| @@ -11537,6 +12081,10 @@ not_end_of_int_subset:
|
| /*
|
| * We didn't found the end of the Internal subset
|
| */
|
| + if (quote == 0)
|
| + ctxt->checkIndex = base;
|
| + else
|
| + ctxt->checkIndex = 0;
|
| #ifdef DEBUG_PUSH
|
| if (next == 0)
|
| xmlGenericError(xmlGenericErrorContext,
|
| @@ -11545,6 +12093,7 @@ not_end_of_int_subset:
|
| goto done;
|
|
|
| found_end_int_subset:
|
| + ctxt->checkIndex = 0;
|
| xmlParseInternalSubset(ctxt);
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| goto done;
|
| @@ -11639,7 +12188,7 @@ found_end_int_subset:
|
| break;
|
| }
|
| }
|
| -done:
|
| +done:
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
|
| #endif
|
| @@ -11659,6 +12208,55 @@ encoding_error:
|
| }
|
|
|
| /**
|
| + * xmlParseCheckTransition:
|
| + * @ctxt: an XML parser context
|
| + * @chunk: a char array
|
| + * @size: the size in byte of the chunk
|
| + *
|
| + * Check depending on the current parser state if the chunk given must be
|
| + * processed immediately or one need more data to advance on parsing.
|
| + *
|
| + * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
|
| + */
|
| +static int
|
| +xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
|
| + if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
|
| + return(-1);
|
| + if (ctxt->instate == XML_PARSER_START_TAG) {
|
| + if (memchr(chunk, '>', size) != NULL)
|
| + return(1);
|
| + return(0);
|
| + }
|
| + if (ctxt->progressive == XML_PARSER_COMMENT) {
|
| + if (memchr(chunk, '>', size) != NULL)
|
| + return(1);
|
| + return(0);
|
| + }
|
| + if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
|
| + if (memchr(chunk, '>', size) != NULL)
|
| + return(1);
|
| + return(0);
|
| + }
|
| + if (ctxt->progressive == XML_PARSER_PI) {
|
| + if (memchr(chunk, '>', size) != NULL)
|
| + return(1);
|
| + return(0);
|
| + }
|
| + if (ctxt->instate == XML_PARSER_END_TAG) {
|
| + if (memchr(chunk, '>', size) != NULL)
|
| + return(1);
|
| + return(0);
|
| + }
|
| + if ((ctxt->progressive == XML_PARSER_DTD) ||
|
| + (ctxt->instate == XML_PARSER_DTD)) {
|
| + if (memchr(chunk, '>', size) != NULL)
|
| + return(1);
|
| + return(0);
|
| + }
|
| + return(1);
|
| +}
|
| +
|
| +/**
|
| * xmlParseChunk:
|
| * @ctxt: an XML parser context
|
| * @chunk: an char array
|
| @@ -11674,6 +12272,8 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
|
| int terminate) {
|
| int end_in_lf = 0;
|
| int remain = 0;
|
| + size_t old_avail = 0;
|
| + size_t avail = 0;
|
|
|
| if (ctxt == NULL)
|
| return(XML_ERR_INTERNAL_ERROR);
|
| @@ -11693,10 +12293,11 @@ xmldecl_done:
|
|
|
| if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
| (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
|
| - int base = ctxt->input->base - ctxt->input->buf->buffer->content;
|
| - int cur = ctxt->input->cur - ctxt->input->base;
|
| + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
|
| + size_t cur = ctxt->input->cur - ctxt->input->base;
|
| int res;
|
|
|
| + old_avail = xmlBufUse(ctxt->input->buf->buffer);
|
| /*
|
| * Specific handling if we autodetected an encoding, we should not
|
| * push more than the first line ... which depend on the encoding
|
| @@ -11725,23 +12326,20 @@ xmldecl_done:
|
| * if size is greater than len. Otherwise, memmove in xmlBufferAdd
|
| * will blindly copy extra bytes from memory.
|
| */
|
| - if (size > len) {
|
| + if ((unsigned int) size > len) {
|
| remain = size - len;
|
| size = len;
|
| } else {
|
| remain = 0;
|
| }
|
| }
|
| - res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
| + res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
| if (res < 0) {
|
| ctxt->errNo = XML_PARSER_EOF;
|
| ctxt->disableSAX = 1;
|
| return (XML_PARSER_EOF);
|
| }
|
| - ctxt->input->base = ctxt->input->buf->buffer->content + base;
|
| - ctxt->input->cur = ctxt->input->base + cur;
|
| - ctxt->input->end =
|
| - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
|
| + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
|
| #endif
|
| @@ -11752,23 +12350,48 @@ xmldecl_done:
|
| if ((in->encoder != NULL) && (in->buffer != NULL) &&
|
| (in->raw != NULL)) {
|
| int nbchars;
|
| + size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
|
| + size_t current = ctxt->input->cur - ctxt->input->base;
|
|
|
| - nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
|
| + nbchars = xmlCharEncInput(in, terminate);
|
| if (nbchars < 0) {
|
| /* TODO 2.6.0 */
|
| xmlGenericError(xmlGenericErrorContext,
|
| "xmlParseChunk: encoder error\n");
|
| return(XML_ERR_INVALID_ENCODING);
|
| }
|
| + xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
|
| }
|
| }
|
| }
|
| - if (remain != 0)
|
| + if (remain != 0) {
|
| xmlParseTryOrFinish(ctxt, 0);
|
| - else
|
| - xmlParseTryOrFinish(ctxt, terminate);
|
| + } else {
|
| + if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
|
| + avail = xmlBufUse(ctxt->input->buf->buffer);
|
| + /*
|
| + * Depending on the current state it may not be such
|
| + * a good idea to try parsing if there is nothing in the chunk
|
| + * which would be worth doing a parser state transition and we
|
| + * need to wait for more data
|
| + */
|
| + if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
|
| + (old_avail == 0) || (avail == 0) ||
|
| + (xmlParseCheckTransition(ctxt,
|
| + (const char *)&ctxt->input->base[old_avail],
|
| + avail - old_avail)))
|
| + xmlParseTryOrFinish(ctxt, terminate);
|
| + }
|
| if (ctxt->instate == XML_PARSER_EOF)
|
| return(ctxt->errNo);
|
| +
|
| + if ((ctxt->input != NULL) &&
|
| + (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
|
| + ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
|
| + ((ctxt->options & XML_PARSE_HUGE) == 0)) {
|
| + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
|
| + ctxt->instate = XML_PARSER_EOF;
|
| + }
|
| if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
|
| return(ctxt->errNo);
|
|
|
| @@ -11780,28 +12403,35 @@ xmldecl_done:
|
| }
|
| if ((end_in_lf == 1) && (ctxt->input != NULL) &&
|
| (ctxt->input->buf != NULL)) {
|
| + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
|
| + ctxt->input);
|
| + size_t current = ctxt->input->cur - ctxt->input->base;
|
| +
|
| xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
|
| +
|
| + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
|
| + base, current);
|
| }
|
| if (terminate) {
|
| /*
|
| * Check for termination
|
| */
|
| - int avail = 0;
|
| + int cur_avail = 0;
|
|
|
| if (ctxt->input != NULL) {
|
| if (ctxt->input->buf == NULL)
|
| - avail = ctxt->input->length -
|
| - (ctxt->input->cur - ctxt->input->base);
|
| + cur_avail = ctxt->input->length -
|
| + (ctxt->input->cur - ctxt->input->base);
|
| else
|
| - avail = ctxt->input->buf->buffer->use -
|
| - (ctxt->input->cur - ctxt->input->base);
|
| + cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
|
| + (ctxt->input->cur - ctxt->input->base);
|
| }
|
| -
|
| +
|
| if ((ctxt->instate != XML_PARSER_EOF) &&
|
| (ctxt->instate != XML_PARSER_EPILOG)) {
|
| xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
|
| - }
|
| - if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
|
| + }
|
| + if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
|
| xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
|
| }
|
| if (ctxt->instate != XML_PARSER_EOF) {
|
| @@ -11810,12 +12440,15 @@ xmldecl_done:
|
| }
|
| ctxt->instate = XML_PARSER_EOF;
|
| }
|
| - return((xmlParserErrors) ctxt->errNo);
|
| + if (ctxt->wellFormed == 0)
|
| + return((xmlParserErrors) ctxt->errNo);
|
| + else
|
| + return(0);
|
| }
|
|
|
| /************************************************************************
|
| * *
|
| - * I/O front end functions to the parser *
|
| + * I/O front end functions to the parser *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -11839,7 +12472,7 @@ xmldecl_done:
|
| */
|
|
|
| xmlParserCtxtPtr
|
| -xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| +xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| const char *chunk, int size, const char *filename) {
|
| xmlParserCtxtPtr ctxt;
|
| xmlParserInputPtr inputStream;
|
| @@ -11888,7 +12521,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
|
| if (user_data != NULL)
|
| ctxt->userData = user_data;
|
| - }
|
| + }
|
| if (filename == NULL) {
|
| ctxt->directory = NULL;
|
| } else {
|
| @@ -11914,11 +12547,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| }
|
| }
|
| inputStream->buf = buf;
|
| - inputStream->base = inputStream->buf->buffer->content;
|
| - inputStream->cur = inputStream->buf->buffer->content;
|
| - inputStream->end =
|
| - &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
|
| -
|
| + xmlBufResetInput(inputStream->buf->buffer, inputStream);
|
| inputPush(ctxt, inputStream);
|
|
|
| /*
|
| @@ -11929,15 +12558,12 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| if ((size == 0) || (chunk == NULL)) {
|
| ctxt->charset = XML_CHAR_ENCODING_NONE;
|
| } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
|
| - int base = ctxt->input->base - ctxt->input->buf->buffer->content;
|
| - int cur = ctxt->input->cur - ctxt->input->base;
|
| + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
|
| + size_t cur = ctxt->input->cur - ctxt->input->base;
|
|
|
| - xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
| + xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
|
|
| - ctxt->input->base = ctxt->input->buf->buffer->content + base;
|
| - ctxt->input->cur = ctxt->input->base + cur;
|
| - ctxt->input->end =
|
| - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
|
| + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
|
| #endif
|
| @@ -11957,7 +12583,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| *
|
| * Blocks further parser processing
|
| */
|
| -void
|
| +void
|
| xmlStopParser(xmlParserCtxtPtr ctxt) {
|
| if (ctxt == NULL)
|
| return;
|
| @@ -11991,11 +12617,15 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| xmlParserCtxtPtr ctxt;
|
| xmlParserInputPtr inputStream;
|
| xmlParserInputBufferPtr buf;
|
| -
|
| +
|
| if (ioread == NULL) return(NULL);
|
|
|
| buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
|
| - if (buf == NULL) return(NULL);
|
| + if (buf == NULL) {
|
| + if (ioclose != NULL)
|
| + ioclose(ioctx);
|
| + return (NULL);
|
| + }
|
|
|
| ctxt = xmlNewParserCtxt();
|
| if (ctxt == NULL) {
|
| @@ -12020,7 +12650,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
|
| if (user_data != NULL)
|
| ctxt->userData = user_data;
|
| - }
|
| + }
|
|
|
| inputStream = xmlNewIOInputStream(ctxt, buf, enc);
|
| if (inputStream == NULL) {
|
| @@ -12035,7 +12665,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| #ifdef LIBXML_VALID_ENABLED
|
| /************************************************************************
|
| * *
|
| - * Front ends when parsing a DTD *
|
| + * Front ends when parsing a DTD *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -12046,7 +12676,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
| * @enc: the charset encoding if known
|
| *
|
| * Load and parse a DTD
|
| - *
|
| + *
|
| * Returns the resulting xmlDtdPtr or NULL in case of error.
|
| * @input will be freed by the function in any case.
|
| */
|
| @@ -12068,10 +12698,13 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
| return(NULL);
|
| }
|
|
|
| + /* We are loading a DTD */
|
| + ctxt->options |= XML_PARSE_DTDLOAD;
|
| +
|
| /*
|
| * Set-up the SAX context
|
| */
|
| - if (sax != NULL) {
|
| + if (sax != NULL) {
|
| if (ctxt->sax != NULL)
|
| xmlFree(ctxt->sax);
|
| ctxt->sax = sax;
|
| @@ -12125,7 +12758,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
|
|
| if ((enc == XML_CHAR_ENCODING_NONE) &&
|
| ((ctxt->input->end - ctxt->input->cur) >= 4)) {
|
| - /*
|
| + /*
|
| * Get the 4 first bytes and decode the charset
|
| * if enc != XML_CHAR_ENCODING_NONE
|
| * plug some encoding conversion routines.
|
| @@ -12164,7 +12797,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
| }
|
| if (sax != NULL) ctxt->sax = NULL;
|
| xmlFreeParserCtxt(ctxt);
|
| -
|
| +
|
| return(ret);
|
| }
|
|
|
| @@ -12175,7 +12808,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
| * @SystemID: a NAME* containing the URL to the DTD
|
| *
|
| * Load and parse an external subset.
|
| - *
|
| + *
|
| * Returns the resulting xmlDtdPtr or NULL in case of error.
|
| */
|
|
|
| @@ -12195,16 +12828,19 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
|
| return(NULL);
|
| }
|
|
|
| + /* We are loading a DTD */
|
| + ctxt->options |= XML_PARSE_DTDLOAD;
|
| +
|
| /*
|
| * Set-up the SAX context
|
| */
|
| - if (sax != NULL) {
|
| + if (sax != NULL) {
|
| if (ctxt->sax != NULL)
|
| xmlFree(ctxt->sax);
|
| ctxt->sax = sax;
|
| ctxt->userData = ctxt;
|
| }
|
| -
|
| +
|
| /*
|
| * Canonicalise the system ID
|
| */
|
| @@ -12315,7 +12951,7 @@ xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
|
|
|
| /************************************************************************
|
| * *
|
| - * Front ends when parsing an Entity *
|
| + * Front ends when parsing an Entity *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -12431,12 +13067,22 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
|
| */
|
| if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
|
| (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
|
| - xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
|
| + xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
|
| "Version mismatch between document and entity\n");
|
| }
|
| }
|
|
|
| /*
|
| + * If the user provided its own SAX callbacks then reuse the
|
| + * useData callback field, otherwise the expected setup in a
|
| + * DOM builder is to have userData == ctxt
|
| + */
|
| + if (ctx->userData == ctx)
|
| + ctxt->userData = ctxt;
|
| + else
|
| + ctxt->userData = ctx->userData;
|
| +
|
| + /*
|
| * Doing validity checking on chunk doesn't make sense
|
| */
|
| ctxt->instate = XML_PARSER_CONTENT;
|
| @@ -12704,7 +13350,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
|
| if (ctxt->lastError.code != XML_ERR_OK)
|
| xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
|
|
|
| - if (sax != NULL)
|
| + if (sax != NULL)
|
| ctxt->sax = oldsax;
|
| oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
|
| oldctxt->node_seq.length = ctxt->node_seq.length;
|
| @@ -13074,7 +13720,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
|
| xmlFree((xmlChar *) ctxt->encoding);
|
| ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
|
|
|
| - hdlr = xmlFindCharEncodingHandler(doc->encoding);
|
| + hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
|
| if (hdlr != NULL) {
|
| xmlSwitchToEncoding(ctxt, hdlr);
|
| } else {
|
| @@ -13085,6 +13731,8 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
|
| xmlCtxtUseOptionsInternal(ctxt, options, NULL);
|
| xmlDetectSAX2(ctxt);
|
| ctxt->myDoc = doc;
|
| + /* parsing in context, i.e. as within existing content */
|
| + ctxt->instate = XML_PARSER_CONTENT;
|
|
|
| fake = xmlNewComment(NULL);
|
| if (fake == NULL) {
|
| @@ -13120,7 +13768,6 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
|
| }
|
| cur = cur->parent;
|
| }
|
| - ctxt->instate = XML_PARSER_CONTENT;
|
| }
|
|
|
| if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
|
| @@ -13523,7 +14170,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
|
| * @filename: the filename or URL
|
| * @options: a combination of xmlParserOption
|
| *
|
| - * Create a parser context for a file or URL content.
|
| + * Create a parser context for a file or URL content.
|
| * Automatic support for ZLIB/Compress compressed document is provided
|
| * by default if found at compile-time and for file accesses
|
| *
|
| @@ -13565,7 +14212,7 @@ xmlCreateURLParserCtxt(const char *filename, int options)
|
| * xmlCreateFileParserCtxt:
|
| * @filename: the filename
|
| *
|
| - * Create a parser context for a file content.
|
| + * Create a parser context for a file content.
|
| * Automatic support for ZLIB/Compress compressed document is provided
|
| * by default if found at compile-time.
|
| *
|
| @@ -13643,7 +14290,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
|
| if (sax != NULL)
|
| ctxt->sax = NULL;
|
| xmlFreeParserCtxt(ctxt);
|
| -
|
| +
|
| return(ret);
|
| }
|
|
|
| @@ -13743,7 +14390,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
|
| xmlClearParserCtxt(ctxt);
|
| return;
|
| }
|
| -
|
| +
|
| xmlClearParserCtxt(ctxt);
|
| if (filename != NULL)
|
| input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
|
| @@ -13761,7 +14408,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
|
| *
|
| * parse an XML file and call the given SAX handler routines.
|
| * Automatic support for ZLIB/Compress compressed document is provided
|
| - *
|
| + *
|
| * Returns 0 in case of success or a error number otherwise
|
| */
|
| int
|
| @@ -13769,7 +14416,7 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
|
| const char *filename) {
|
| int ret = 0;
|
| xmlParserCtxtPtr ctxt;
|
| -
|
| +
|
| ctxt = xmlCreateFileParserCtxt(filename);
|
| if (ctxt == NULL) return -1;
|
| if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
|
| @@ -13779,9 +14426,9 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
|
|
|
| if (user_data != NULL)
|
| ctxt->userData = user_data;
|
| -
|
| +
|
| xmlParseDocument(ctxt);
|
| -
|
| +
|
| if (ctxt->wellFormed)
|
| ret = 0;
|
| else {
|
| @@ -13797,14 +14444,14 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
|
| ctxt->myDoc = NULL;
|
| }
|
| xmlFreeParserCtxt(ctxt);
|
| -
|
| +
|
| return ret;
|
| }
|
| #endif /* LIBXML_SAX1_ENABLED */
|
|
|
| /************************************************************************
|
| * *
|
| - * Front ends when parsing from memory *
|
| + * Front ends when parsing from memory *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -13848,9 +14495,7 @@ xmlCreateMemoryParserCtxt(const char *buffer, int size) {
|
|
|
| input->filename = NULL;
|
| input->buf = buf;
|
| - input->base = input->buf->buffer->content;
|
| - input->cur = input->buf->buffer->content;
|
| - input->end = &input->buf->buffer->content[input->buf->buffer->use];
|
| + xmlBufResetInput(input->buf->buffer, input);
|
|
|
| inputPush(ctxt, input);
|
| return(ctxt);
|
| @@ -13906,7 +14551,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
|
| xmlFreeDoc(ctxt->myDoc);
|
| ctxt->myDoc = NULL;
|
| }
|
| - if (sax != NULL)
|
| + if (sax != NULL)
|
| ctxt->sax = NULL;
|
| xmlFreeParserCtxt(ctxt);
|
|
|
| @@ -13924,7 +14569,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
|
| * parse an XML in-memory block and use the given SAX function block
|
| * to handle the parsing callback. If sax is NULL, fallback to the default
|
| * DOM tree building routines.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -13939,7 +14584,7 @@ xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
|
| * @size: the size of the array
|
| *
|
| * parse an XML in-memory block and build a tree.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
|
|
| @@ -13993,7 +14638,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
|
| ctxt->userData = user_data;
|
|
|
| xmlParseDocument(ctxt);
|
| -
|
| +
|
| if (ctxt->wellFormed)
|
| ret = 0;
|
| else {
|
| @@ -14009,7 +14654,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
|
| ctxt->myDoc = NULL;
|
| }
|
| xmlFreeParserCtxt(ctxt);
|
| -
|
| +
|
| return ret;
|
| }
|
| #endif /* LIBXML_SAX1_ENABLED */
|
| @@ -14043,7 +14688,7 @@ xmlCreateDocParserCtxt(const xmlChar *cur) {
|
| * parse an XML in-memory document and build a tree.
|
| * It use the given SAX function block to handle the parsing callback.
|
| * If sax is NULL, fallback to the default DOM tree building routines.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
|
|
| @@ -14058,7 +14703,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
|
|
|
| ctxt = xmlCreateDocParserCtxt(cur);
|
| if (ctxt == NULL) return(NULL);
|
| - if (sax != NULL) {
|
| + if (sax != NULL) {
|
| oldsax = ctxt->sax;
|
| ctxt->sax = sax;
|
| ctxt->userData = NULL;
|
| @@ -14075,7 +14720,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
|
| if (sax != NULL)
|
| ctxt->sax = oldsax;
|
| xmlFreeParserCtxt(ctxt);
|
| -
|
| +
|
| return(ret);
|
| }
|
|
|
| @@ -14084,7 +14729,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
|
| * @cur: a pointer to an array of xmlChar
|
| *
|
| * parse an XML in-memory document and build a tree.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
|
|
| @@ -14097,8 +14742,8 @@ xmlParseDoc(const xmlChar *cur) {
|
| #ifdef LIBXML_LEGACY_ENABLED
|
| /************************************************************************
|
| * *
|
| - * Specific function to keep track of entities references *
|
| - * and used by the XSLT debugger *
|
| + * Specific function to keep track of entities references *
|
| + * and used by the XSLT debugger *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -14108,7 +14753,7 @@ static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
|
| * xmlAddEntityReference:
|
| * @ent : A valid entity
|
| * @firstNode : A valid first node for children of entity
|
| - * @lastNode : A valid last node of children entity
|
| + * @lastNode : A valid last node of children entity
|
| *
|
| * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
|
| */
|
| @@ -14137,7 +14782,7 @@ xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
|
|
|
| /************************************************************************
|
| * *
|
| - * Miscellaneous *
|
| + * Miscellaneous *
|
| * *
|
| ************************************************************************/
|
|
|
| @@ -14165,12 +14810,13 @@ xmlInitParser(void) {
|
| __xmlGlobalInitMutexLock();
|
| if (xmlParserInitialized == 0) {
|
| #endif
|
| - xmlInitGlobals();
|
| xmlInitThreads();
|
| + xmlInitGlobals();
|
| if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
|
| (xmlGenericError == NULL))
|
| initGenericErrorDefaultFunc(NULL);
|
| xmlInitMemory();
|
| + xmlInitializeDict();
|
| xmlInitCharEncodingHandlers();
|
| xmlDefaultSAXHandlerInit();
|
| xmlRegisterDefaultInputCallbacks();
|
| @@ -14184,6 +14830,9 @@ xmlInitParser(void) {
|
| #ifdef LIBXML_XPATH_ENABLED
|
| xmlXPathInit();
|
| #endif
|
| +#ifdef LIBXML_CATALOG_ENABLED
|
| + xmlInitializeCatalog();
|
| +#endif
|
| xmlParserInitialized = 1;
|
| #ifdef LIBXML_THREAD_ENABLED
|
| }
|
| @@ -14231,8 +14880,8 @@ xmlCleanupParser(void) {
|
| xmlSchemaCleanupTypes();
|
| xmlRelaxNGCleanupTypes();
|
| #endif
|
| - xmlCleanupGlobals();
|
| xmlResetLastError();
|
| + xmlCleanupGlobals();
|
| xmlCleanupThreads(); /* must be last if called not from the main thread */
|
| xmlCleanupMemory();
|
| xmlParserInitialized = 0;
|
| @@ -14252,7 +14901,7 @@ xmlCleanupParser(void) {
|
| * current scope
|
| */
|
| #define DICT_FREE(str) \
|
| - if ((str) && ((!dict) || \
|
| + if ((str) && ((!dict) || \
|
| (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
|
| xmlFree((char *)(str));
|
|
|
| @@ -14267,7 +14916,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
|
| {
|
| xmlParserInputPtr input;
|
| xmlDictPtr dict;
|
| -
|
| +
|
| if (ctxt == NULL)
|
| return;
|
|
|
| @@ -14335,6 +14984,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
|
| ctxt->catalogs = NULL;
|
| ctxt->nbentities = 0;
|
| ctxt->sizeentities = 0;
|
| + ctxt->sizeentcopy = 0;
|
| xmlInitNodeInfoSeq(&ctxt->node_seq);
|
|
|
| if (ctxt->attsDefault != NULL) {
|
| @@ -14419,25 +15069,18 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
|
| inputStream->filename = (char *)
|
| xmlCanonicPath((const xmlChar *) filename);
|
| inputStream->buf = buf;
|
| - inputStream->base = inputStream->buf->buffer->content;
|
| - inputStream->cur = inputStream->buf->buffer->content;
|
| - inputStream->end =
|
| - &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
|
| + xmlBufResetInput(buf->buffer, inputStream);
|
|
|
| inputPush(ctxt, inputStream);
|
|
|
| if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
| (ctxt->input->buf != NULL)) {
|
| - int base = ctxt->input->base - ctxt->input->buf->buffer->content;
|
| - int cur = ctxt->input->cur - ctxt->input->base;
|
| + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
|
| + size_t cur = ctxt->input->cur - ctxt->input->base;
|
|
|
| xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
|
|
| - ctxt->input->base = ctxt->input->buf->buffer->content + base;
|
| - ctxt->input->cur = ctxt->input->base + cur;
|
| - ctxt->input->end =
|
| - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
|
| - use];
|
| + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
|
| #ifdef DEBUG_PUSH
|
| xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
|
| #endif
|
| @@ -14588,11 +15231,21 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi
|
| if (options & XML_PARSE_HUGE) {
|
| ctxt->options |= XML_PARSE_HUGE;
|
| options -= XML_PARSE_HUGE;
|
| + if (ctxt->dict != NULL)
|
| + xmlDictSetLimit(ctxt->dict, 0);
|
| }
|
| if (options & XML_PARSE_OLDSAX) {
|
| ctxt->options |= XML_PARSE_OLDSAX;
|
| options -= XML_PARSE_OLDSAX;
|
| }
|
| + if (options & XML_PARSE_IGNORE_ENC) {
|
| + ctxt->options |= XML_PARSE_IGNORE_ENC;
|
| + options -= XML_PARSE_IGNORE_ENC;
|
| + }
|
| + if (options & XML_PARSE_BIG_LINES) {
|
| + ctxt->options |= XML_PARSE_BIG_LINES;
|
| + options -= XML_PARSE_BIG_LINES;
|
| + }
|
| ctxt->linenumbers = 1;
|
| return (options);
|
| }
|
| @@ -14667,7 +15320,7 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
|
| * @options: a combination of xmlParserOption
|
| *
|
| * parse an XML in-memory document and build a tree.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14677,6 +15330,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio
|
|
|
| if (cur == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| ctxt = xmlCreateDocParserCtxt(cur);
|
| if (ctxt == NULL)
|
| @@ -14691,7 +15345,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio
|
| * @options: a combination of xmlParserOption
|
| *
|
| * parse an XML file from the filesystem or the network.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14699,6 +15353,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
|
| {
|
| xmlParserCtxtPtr ctxt;
|
|
|
| + xmlInitParser();
|
| ctxt = xmlCreateURLParserCtxt(filename, options);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| @@ -14714,7 +15369,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
|
| * @options: a combination of xmlParserOption
|
| *
|
| * parse an XML in-memory document and build a tree.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14722,6 +15377,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin
|
| {
|
| xmlParserCtxtPtr ctxt;
|
|
|
| + xmlInitParser();
|
| ctxt = xmlCreateMemoryParserCtxt(buffer, size);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| @@ -14738,7 +15394,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin
|
| * parse an XML from a file descriptor and build a tree.
|
| * NOTE that the file descriptor will not be closed when the
|
| * reader is closed or reset.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14750,6 +15406,7 @@ xmlReadFd(int fd, const char *URL, const char *encoding, int options)
|
|
|
| if (fd < 0)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
| if (input == NULL)
|
| @@ -14780,7 +15437,7 @@ xmlReadFd(int fd, const char *URL, const char *encoding, int options)
|
| * @options: a combination of xmlParserOption
|
| *
|
| * parse an XML document from I/O functions and source and build a tree.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14793,11 +15450,15 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
|
|
|
| if (ioread == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
| XML_CHAR_ENCODING_NONE);
|
| - if (input == NULL)
|
| + if (input == NULL) {
|
| + if (ioclose != NULL)
|
| + ioclose(ioctx);
|
| return (NULL);
|
| + }
|
| ctxt = xmlNewParserCtxt();
|
| if (ctxt == NULL) {
|
| xmlFreeParserInputBuffer(input);
|
| @@ -14823,7 +15484,7 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
|
| *
|
| * parse an XML in-memory document and build a tree.
|
| * This reuses the existing @ctxt parser context
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14836,6 +15497,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| xmlCtxtReset(ctxt);
|
|
|
| @@ -14856,7 +15518,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
|
| *
|
| * parse an XML file from the filesystem or the network.
|
| * This reuses the existing @ctxt parser context
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14869,6 +15531,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| xmlCtxtReset(ctxt);
|
|
|
| @@ -14891,7 +15554,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
|
| *
|
| * parse an XML in-memory document and build a tree.
|
| * This reuses the existing @ctxt parser context
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14905,6 +15568,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
|
| return (NULL);
|
| if (buffer == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| xmlCtxtReset(ctxt);
|
|
|
| @@ -14935,7 +15599,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
|
| * This reuses the existing @ctxt parser context
|
| * NOTE that the file descriptor will not be closed when the
|
| * reader is closed or reset.
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14949,6 +15613,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| xmlCtxtReset(ctxt);
|
|
|
| @@ -14978,7 +15643,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
|
| *
|
| * parse an XML document from I/O functions and source and build a tree.
|
| * This reuses the existing @ctxt parser context
|
| - *
|
| + *
|
| * Returns the resulting document tree
|
| */
|
| xmlDocPtr
|
| @@ -14994,13 +15659,17 @@ xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
|
| return (NULL);
|
| if (ctxt == NULL)
|
| return (NULL);
|
| + xmlInitParser();
|
|
|
| xmlCtxtReset(ctxt);
|
|
|
| input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
| XML_CHAR_ENCODING_NONE);
|
| - if (input == NULL)
|
| + if (input == NULL) {
|
| + if (ioclose != NULL)
|
| + ioclose(ioctx);
|
| return (NULL);
|
| + }
|
| stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
| if (stream == NULL) {
|
| xmlFreeParserInputBuffer(input);
|
|
|