| Index: third_party/libxml/xmlregexp.c
|
| diff --git a/third_party/libxml/xmlregexp.c b/third_party/libxml/xmlregexp.c
|
| index 2f8ae3dd2598312629429ff4481599f609c60f15..feae87460ed5b58923b78c9dd76ed3deb98a4999 100644
|
| --- a/third_party/libxml/xmlregexp.c
|
| +++ b/third_party/libxml/xmlregexp.c
|
| @@ -233,6 +233,8 @@ struct _xmlAutomataState {
|
| typedef struct _xmlAutomata xmlRegParserCtxt;
|
| typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
|
|
|
| +#define AM_AUTOMATA_RNG 1
|
| +
|
| struct _xmlAutomata {
|
| xmlChar *string;
|
| xmlChar *cur;
|
| @@ -260,6 +262,7 @@ struct _xmlAutomata {
|
|
|
| int determinist;
|
| int negs;
|
| + int flags;
|
| };
|
|
|
| struct _xmlRegexp {
|
| @@ -271,6 +274,7 @@ struct _xmlRegexp {
|
| int nbCounters;
|
| xmlRegCounter *counters;
|
| int determinist;
|
| + int flags;
|
| /*
|
| * That's the compact form for determinists automatas
|
| */
|
| @@ -353,6 +357,8 @@ static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
|
| static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
|
| int neg, int start, int end, const xmlChar *blockName);
|
|
|
| +void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
|
| +
|
| /************************************************************************
|
| * *
|
| * Regexp memory error handler *
|
| @@ -434,6 +440,7 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
|
| ret->nbCounters = ctxt->nbCounters;
|
| ret->counters = ctxt->counters;
|
| ret->determinist = ctxt->determinist;
|
| + ret->flags = ctxt->flags;
|
| if (ret->determinist == -1) {
|
| xmlRegexpIsDeterminist(ret);
|
| }
|
| @@ -1569,8 +1576,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
|
| * 1. set transition from atom start to new state
|
| * 2. set transition from atom end to this state.
|
| */
|
| - xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
|
| - xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state);
|
| + if (to == NULL) {
|
| + xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
|
| + xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
|
| + ctxt->state);
|
| + } else {
|
| + xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
|
| + }
|
| break;
|
| case XML_REGEXP_QUANT_MULT:
|
| atom->quant = XML_REGEXP_QUANT_ONCE;
|
| @@ -2083,12 +2095,13 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
|
| (range2->type == XML_REGEXP_EPSILON)) {
|
| return(0);
|
| } else if (range1->type == range2->type) {
|
| - if ((range1->type != XML_REGEXP_CHARVAL) ||
|
| - (range1->end < range2->start) ||
|
| - (range2->end < range1->start))
|
| - ret = 1;
|
| - else
|
| + if (range1->type != XML_REGEXP_CHARVAL)
|
| + ret = 1;
|
| + else if ((range1->end < range2->start) ||
|
| + (range2->end < range1->start))
|
| ret = 0;
|
| + else
|
| + ret = 1;
|
| } else if (range1->type == XML_REGEXP_CHARVAL) {
|
| int codepoint;
|
| int neg = 0;
|
| @@ -2215,7 +2228,7 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
|
| if (((range1->neg == 0) && (range2->neg != 0)) ||
|
| ((range1->neg != 0) && (range2->neg == 0)))
|
| ret = !ret;
|
| - return(1);
|
| + return(ret);
|
| }
|
|
|
| /**
|
| @@ -2423,6 +2436,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
|
| * xmlFAEqualAtoms:
|
| * @atom1: an atom
|
| * @atom2: an atom
|
| + * @deep: if not set only compare string pointers
|
| *
|
| * Compares two atoms to check whether they are the same exactly
|
| * this is used to remove equivalent transitions
|
| @@ -2430,7 +2444,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
|
| * Returns 1 if same and 0 otherwise
|
| */
|
| static int
|
| -xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
|
| +xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
|
| int ret = 0;
|
|
|
| if (atom1 == atom2)
|
| @@ -2445,8 +2459,11 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
|
| ret = 0;
|
| break;
|
| case XML_REGEXP_STRING:
|
| - ret = xmlStrEqual((xmlChar *)atom1->valuep,
|
| - (xmlChar *)atom2->valuep);
|
| + if (!deep)
|
| + ret = (atom1->valuep == atom2->valuep);
|
| + else
|
| + ret = xmlStrEqual((xmlChar *)atom1->valuep,
|
| + (xmlChar *)atom2->valuep);
|
| break;
|
| case XML_REGEXP_CHARVAL:
|
| ret = (atom1->codepoint == atom2->codepoint);
|
| @@ -2464,6 +2481,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
|
| * xmlFACompareAtoms:
|
| * @atom1: an atom
|
| * @atom2: an atom
|
| + * @deep: if not set only compare string pointers
|
| *
|
| * Compares two atoms to check whether they intersect in some ways,
|
| * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
|
| @@ -2471,7 +2489,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
|
| * Returns 1 if yes and 0 otherwise
|
| */
|
| static int
|
| -xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
|
| +xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
|
| int ret = 1;
|
|
|
| if (atom1 == atom2)
|
| @@ -2497,8 +2515,11 @@ xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
|
| }
|
| switch (atom1->type) {
|
| case XML_REGEXP_STRING:
|
| - ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
|
| - (xmlChar *)atom2->valuep);
|
| + if (!deep)
|
| + ret = (atom1->valuep != atom2->valuep);
|
| + else
|
| + ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
|
| + (xmlChar *)atom2->valuep);
|
| break;
|
| case XML_REGEXP_EPSILON:
|
| goto not_determinist;
|
| @@ -2561,9 +2582,14 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
|
| int res;
|
| int transnr, nbTrans;
|
| xmlRegTransPtr t1;
|
| + int deep = 1;
|
|
|
| if (state == NULL)
|
| return(ret);
|
| +
|
| + if (ctxt->flags & AM_AUTOMATA_RNG)
|
| + deep = 0;
|
| +
|
| /*
|
| * don't recurse on transitions potentially added in the course of
|
| * the elimination.
|
| @@ -2587,7 +2613,7 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
|
| }
|
| if (t1->to != to)
|
| continue;
|
| - if (xmlFACompareAtoms(t1->atom, atom)) {
|
| + if (xmlFACompareAtoms(t1->atom, atom, deep)) {
|
| ret = 0;
|
| /* mark the transition as non-deterministic */
|
| t1->nd = 1;
|
| @@ -2611,6 +2637,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
|
| xmlRegTransPtr t1, t2, last;
|
| int i;
|
| int ret = 1;
|
| + int deep = 1;
|
|
|
| #ifdef DEBUG_REGEXP_GRAPH
|
| printf("xmlFAComputesDeterminism\n");
|
| @@ -2619,6 +2646,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
|
| if (ctxt->determinist != -1)
|
| return(ctxt->determinist);
|
|
|
| + if (ctxt->flags & AM_AUTOMATA_RNG)
|
| + deep = 0;
|
| +
|
| /*
|
| * First cleanup the automata removing cancelled transitions
|
| */
|
| @@ -2646,7 +2676,13 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
|
| continue;
|
| if (t2->atom != NULL) {
|
| if (t1->to == t2->to) {
|
| - if (xmlFAEqualAtoms(t1->atom, t2->atom))
|
| + /*
|
| + * Here we use deep because we want to keep the
|
| + * transitions which indicate a conflict
|
| + */
|
| + if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
|
| + (t1->counter == t2->counter) &&
|
| + (t1->count == t2->count))
|
| t2->to = -1; /* eliminated */
|
| }
|
| }
|
| @@ -2681,8 +2717,11 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
|
| if (t2->to == -1) /* eliminated */
|
| continue;
|
| if (t2->atom != NULL) {
|
| - /* not determinist ! */
|
| - if (xmlFACompareAtoms(t1->atom, t2->atom)) {
|
| + /*
|
| + * But here we don't use deep because we want to
|
| + * find transitions which indicate a conflict
|
| + */
|
| + if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
|
| ret = 0;
|
| /* mark the transitions as non-deterministic ones */
|
| t1->nd = 1;
|
| @@ -3162,7 +3201,8 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
|
| exec->counts = NULL;
|
| while ((exec->status == 0) &&
|
| ((exec->inputString[exec->index] != 0) ||
|
| - (exec->state->type != XML_REGEXP_FINAL_STATE))) {
|
| + ((exec->state != NULL) &&
|
| + (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
|
| xmlRegTransPtr trans;
|
| xmlRegAtomPtr atom;
|
|
|
| @@ -4852,6 +4892,17 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
|
| }
|
| }
|
| } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
|
| + switch (cur) {
|
| + case 'n':
|
| + cur = '\n';
|
| + break;
|
| + case 'r':
|
| + cur = '\r';
|
| + break;
|
| + case 't':
|
| + cur = '\t';
|
| + break;
|
| + }
|
| xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
|
| XML_REGEXP_CHARVAL, cur, cur, NULL);
|
| }
|
| @@ -4906,64 +4957,6 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
|
| }
|
|
|
| /**
|
| - * xmlFAParseCharRef:
|
| - * @ctxt: a regexp parser context
|
| - *
|
| - * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
|
| - */
|
| -static int
|
| -xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
|
| - int ret = 0, cur;
|
| -
|
| - if ((CUR != '&') || (NXT(1) != '#'))
|
| - return(-1);
|
| - NEXT;
|
| - NEXT;
|
| - cur = CUR;
|
| - if (cur == 'x') {
|
| - NEXT;
|
| - cur = CUR;
|
| - if (((cur >= '0') && (cur <= '9')) ||
|
| - ((cur >= 'a') && (cur <= 'f')) ||
|
| - ((cur >= 'A') && (cur <= 'F'))) {
|
| - while (((cur >= '0') && (cur <= '9')) ||
|
| - ((cur >= 'a') && (cur <= 'f')) ||
|
| - ((cur >= 'A') && (cur <= 'F'))) {
|
| - if ((cur >= '0') && (cur <= '9'))
|
| - ret = ret * 16 + cur - '0';
|
| - else if ((cur >= 'a') && (cur <= 'f'))
|
| - ret = ret * 16 + 10 + (cur - 'a');
|
| - else
|
| - ret = ret * 16 + 10 + (cur - 'A');
|
| - NEXT;
|
| - cur = CUR;
|
| - }
|
| - } else {
|
| - ERROR("Char ref: expecting [0-9A-F]");
|
| - return(-1);
|
| - }
|
| - } else {
|
| - if ((cur >= '0') && (cur <= '9')) {
|
| - while ((cur >= '0') && (cur <= '9')) {
|
| - ret = ret * 10 + cur - '0';
|
| - NEXT;
|
| - cur = CUR;
|
| - }
|
| - } else {
|
| - ERROR("Char ref: expecting [0-9]");
|
| - return(-1);
|
| - }
|
| - }
|
| - if (cur != ';') {
|
| - ERROR("Char ref: expecting ';'");
|
| - return(-1);
|
| - } else {
|
| - NEXT;
|
| - }
|
| - return(ret);
|
| -}
|
| -
|
| -/**
|
| * xmlFAParseCharRange:
|
| * @ctxt: a regexp parser context
|
| *
|
| @@ -4984,12 +4977,6 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
|
| return;
|
| }
|
|
|
| - if ((CUR == '&') && (NXT(1) == '#')) {
|
| - end = start = xmlFAParseCharRef(ctxt);
|
| - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
|
| - XML_REGEXP_CHARVAL, start, end, NULL);
|
| - return;
|
| - }
|
| cur = CUR;
|
| if (cur == '\\') {
|
| NEXT;
|
| @@ -5533,10 +5520,12 @@ xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
|
| am->nbStates = comp->nbStates;
|
| am->states = comp->states;
|
| am->determinist = -1;
|
| + am->flags = comp->flags;
|
| ret = xmlFAComputesDeterminism(am);
|
| am->atoms = NULL;
|
| am->states = NULL;
|
| xmlFreeAutomata(am);
|
| + comp->determinist = ret;
|
| return(ret);
|
| }
|
|
|
| @@ -5614,6 +5603,7 @@ xmlNewAutomata(void) {
|
| xmlFreeAutomata(ctxt);
|
| return(NULL);
|
| }
|
| + ctxt->flags = 0;
|
|
|
| return(ctxt);
|
| }
|
| @@ -5632,6 +5622,20 @@ xmlFreeAutomata(xmlAutomataPtr am) {
|
| }
|
|
|
| /**
|
| + * xmlAutomataSetFlags:
|
| + * @am: an automata
|
| + * @flags: a set of internal flags
|
| + *
|
| + * Set some flags on the automata
|
| + */
|
| +void
|
| +xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
|
| + if (am == NULL)
|
| + return;
|
| + am->flags |= flags;
|
| +}
|
| +
|
| +/**
|
| * xmlAutomataGetInitState:
|
| * @am: an automata
|
| *
|
| @@ -6317,6 +6321,7 @@ struct _xmlExpCtxt {
|
| int size;
|
| int nbElems;
|
| int nb_nodes;
|
| + int maxNodes;
|
| const char *expr;
|
| const char *cur;
|
| int nb_cons;
|
| @@ -6346,6 +6351,7 @@ xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
|
| memset(ret, 0, sizeof(xmlExpCtxt));
|
| ret->size = size;
|
| ret->nbElems = 0;
|
| + ret->maxNodes = maxNodes;
|
| ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
|
| if (ret->table == NULL) {
|
| xmlFree(ret);
|
| @@ -6931,7 +6937,7 @@ tail:
|
| return(0);
|
| if (nb >= len)
|
| return(-2);
|
| - list[nb++] = exp->exp_str;
|
| + list[nb] = exp->exp_str;
|
| return(1);
|
| case XML_EXP_COUNT:
|
| exp = exp->exp_left;
|
| @@ -6986,7 +6992,7 @@ tail:
|
| return(0);
|
| if (nb >= len)
|
| return(-2);
|
| - list[nb++] = exp->exp_str;
|
| + list[nb] = exp->exp_str;
|
| return(1);
|
| case XML_EXP_COUNT:
|
| exp = exp->exp_left;
|
|
|