Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(985)

Side by Side Diff: third_party/libxml/src/xmlregexp.c

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: no iconv Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libxml/src/xmlreader.c ('k') | third_party/libxml/src/xmlsave.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * regexp.c: generic and extensible Regular Expression engine 2 * regexp.c: generic and extensible Regular Expression engine
3 * 3 *
4 * Basically designed with the purpose of compiling regexps for 4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in 5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications these include: 6 * XML related specifications these include:
7 * - XML-1.0 DTD validation 7 * - XML-1.0 DTD validation
8 * - XML Schemas structure part 1 8 * - XML Schemas structure part 1
9 * - XML Schemas Datatypes part 2 especially Appendix F 9 * - XML Schemas Datatypes part 2 especially Appendix F
10 * - RELAX-NG/TREX i.e. the counter proposal 10 * - RELAX-NG/TREX i.e. the counter proposal
11 * 11 *
12 * See Copyright for the status of this software. 12 * See Copyright for the status of this software.
13 * 13 *
14 * Daniel Veillard <veillard@redhat.com> 14 * Daniel Veillard <veillard@redhat.com>
(...skipping 22 matching lines...) Expand all
37 #define INT_MAX 123456789 /* easy to flag and big enough for our needs */ 37 #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
38 #endif 38 #endif
39 39
40 /* #define DEBUG_REGEXP_GRAPH */ 40 /* #define DEBUG_REGEXP_GRAPH */
41 /* #define DEBUG_REGEXP_EXEC */ 41 /* #define DEBUG_REGEXP_EXEC */
42 /* #define DEBUG_PUSH */ 42 /* #define DEBUG_PUSH */
43 /* #define DEBUG_COMPACTION */ 43 /* #define DEBUG_COMPACTION */
44 44
45 #define MAX_PUSH 10000000 45 #define MAX_PUSH 10000000
46 46
47 #ifdef ERROR
48 #undef ERROR
49 #endif
47 #define ERROR(str) \ 50 #define ERROR(str) \
48 ctxt->error = XML_REGEXP_COMPILE_ERROR; \ 51 ctxt->error = XML_REGEXP_COMPILE_ERROR; \
49 xmlRegexpErrCompile(ctxt, str); 52 xmlRegexpErrCompile(ctxt, str);
50 #define NEXT ctxt->cur++ 53 #define NEXT ctxt->cur++
51 #define CUR (*(ctxt->cur)) 54 #define CUR (*(ctxt->cur))
52 #define NXT(index) (ctxt->cur[index]) 55 #define NXT(index) (ctxt->cur[index])
53 56
54 #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) 57 #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
55 #define NEXTL(l) ctxt->cur += l; 58 #define NEXTL(l) ctxt->cur += l;
56 #define XML_REG_STRING_SEPARATOR '|' 59 #define XML_REG_STRING_SEPARATOR '|'
57 /* 60 /*
58 * Need PREV to check on a '-' within a Character Group. May only be used 61 * Need PREV to check on a '-' within a Character Group. May only be used
59 * when it's guaranteed that cur is not at the beginning of ctxt->string! 62 * when it's guaranteed that cur is not at the beginning of ctxt->string!
60 */ 63 */
61 #define PREV (ctxt->cur[-1]) 64 #define PREV (ctxt->cur[-1])
62 65
63 /** 66 /**
64 * TODO: 67 * TODO:
65 * 68 *
66 * macro to flag unimplemented blocks 69 * macro to flag unimplemented blocks
67 */ 70 */
68 #define TODO » » » » » » » » \ 71 #define TODO» » » » » » » » \
69 xmlGenericError(xmlGenericErrorContext, \ 72 xmlGenericError(xmlGenericErrorContext, \
70 "Unimplemented block at %s:%d\n", \ 73 "Unimplemented block at %s:%d\n", \
71 __FILE__, __LINE__); 74 __FILE__, __LINE__);
72 75
73 /************************************************************************ 76 /************************************************************************
74 * » » » » » » » » » * 77 *» » » » » » » » » *
75 * » » » Datatypes and structures» » » * 78 *» » » Datatypes and structures» » » *
76 * » » » » » » » » » * 79 *» » » » » » » » » *
77 ************************************************************************/ 80 ************************************************************************/
78 81
79 /* 82 /*
80 * Note: the order of the enums below is significant, do not shuffle 83 * Note: the order of the enums below is significant, do not shuffle
81 */ 84 */
82 typedef enum { 85 typedef enum {
83 XML_REGEXP_EPSILON = 1, 86 XML_REGEXP_EPSILON = 1,
84 XML_REGEXP_CHARVAL, 87 XML_REGEXP_CHARVAL,
85 XML_REGEXP_RANGES, 88 XML_REGEXP_RANGES,
86 XML_REGEXP_SUBREG, /* used for () sub regexps */ 89 XML_REGEXP_SUBREG, /* used for () sub regexps */
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
212 xmlRegAtomPtr atom; 215 xmlRegAtomPtr atom;
213 int to; 216 int to;
214 int counter; 217 int counter;
215 int count; 218 int count;
216 int nd; 219 int nd;
217 }; 220 };
218 221
219 struct _xmlAutomataState { 222 struct _xmlAutomataState {
220 xmlRegStateType type; 223 xmlRegStateType type;
221 xmlRegMarkedType mark; 224 xmlRegMarkedType mark;
225 xmlRegMarkedType markd;
222 xmlRegMarkedType reached; 226 xmlRegMarkedType reached;
223 int no; 227 int no;
224 int maxTrans; 228 int maxTrans;
225 int nbTrans; 229 int nbTrans;
226 xmlRegTrans *trans; 230 xmlRegTrans *trans;
227 /* knowing states ponting to us can speed things up */ 231 /* knowing states ponting to us can speed things up */
228 int maxTransTo; 232 int maxTransTo;
229 int nbTransTo; 233 int nbTransTo;
230 int *transTo; 234 int *transTo;
231 }; 235 };
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
354 static void xmlRegFreeAtom(xmlRegAtomPtr atom); 358 static void xmlRegFreeAtom(xmlRegAtomPtr atom);
355 static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr); 359 static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
356 static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint); 360 static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
357 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, 361 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
358 int neg, int start, int end, const xmlChar *blockName); 362 int neg, int start, int end, const xmlChar *blockName);
359 363
360 void xmlAutomataSetFlags(xmlAutomataPtr am, int flags); 364 void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
361 365
362 /************************************************************************ 366 /************************************************************************
363 * * 367 * *
364 * » » Regexp memory error handler» » » » * 368 *» » Regexp memory error handler» » » » *
365 * * 369 * *
366 ************************************************************************/ 370 ************************************************************************/
367 /** 371 /**
368 * xmlRegexpErrMemory: 372 * xmlRegexpErrMemory:
369 * @extra: extra information 373 * @extra: extra information
370 * 374 *
371 * Handle an out of memory condition 375 * Handle an out of memory condition
372 */ 376 */
373 static void 377 static void
374 xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra) 378 xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
(...skipping 26 matching lines...) Expand all
401 idx = ctxt->cur - ctxt->string; 405 idx = ctxt->cur - ctxt->string;
402 ctxt->error = XML_REGEXP_COMPILE_ERROR; 406 ctxt->error = XML_REGEXP_COMPILE_ERROR;
403 } 407 }
404 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, 408 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
405 XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra, 409 XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
406 regexp, NULL, idx, 0, 410 regexp, NULL, idx, 0,
407 "failed to compile: %s\n", extra); 411 "failed to compile: %s\n", extra);
408 } 412 }
409 413
410 /************************************************************************ 414 /************************************************************************
411 * » » » » » » » » » * 415 *» » » » » » » » » *
412 * » » » Allocation/Deallocation»» » » * 416 *» » » Allocation/Deallocation»» » » *
413 * » » » » » » » » » * 417 *» » » » » » » » » *
414 ************************************************************************/ 418 ************************************************************************/
415 419
416 static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); 420 static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
417 /** 421 /**
418 * xmlRegEpxFromParse: 422 * xmlRegEpxFromParse:
419 * @ctxt: the parser context used to build it 423 * @ctxt: the parser context used to build it
420 * 424 *
421 * Allocate a new regexp and fill it with the result from the parser 425 * Allocate a new regexp and fill it with the result from the parser
422 * 426 *
423 * Returns the new regexp or NULL in case of error 427 * Returns the new regexp or NULL in case of error
(...skipping 500 matching lines...) Expand 10 before | Expand all | Expand 10 after
924 for (i = 0;i < ctxt->nbAtoms;i++) 928 for (i = 0;i < ctxt->nbAtoms;i++)
925 xmlRegFreeAtom(ctxt->atoms[i]); 929 xmlRegFreeAtom(ctxt->atoms[i]);
926 xmlFree(ctxt->atoms); 930 xmlFree(ctxt->atoms);
927 } 931 }
928 if (ctxt->counters != NULL) 932 if (ctxt->counters != NULL)
929 xmlFree(ctxt->counters); 933 xmlFree(ctxt->counters);
930 xmlFree(ctxt); 934 xmlFree(ctxt);
931 } 935 }
932 936
933 /************************************************************************ 937 /************************************************************************
934 * » » » » » » » » » * 938 *» » » » » » » » » *
935 * » » » Display of Data structures» » » * 939 *» » » Display of Data structures» » » *
936 * » » » » » » » » » * 940 *» » » » » » » » » *
937 ************************************************************************/ 941 ************************************************************************/
938 942
939 static void 943 static void
940 xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { 944 xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
941 switch (type) { 945 switch (type) {
942 case XML_REGEXP_EPSILON: 946 case XML_REGEXP_EPSILON:
943 fprintf(output, "epsilon "); break; 947 fprintf(output, "epsilon "); break;
944 case XML_REGEXP_CHARVAL: 948 case XML_REGEXP_CHARVAL:
945 fprintf(output, "charval "); break; 949 fprintf(output, "charval "); break;
946 case XML_REGEXP_RANGES: 950 case XML_REGEXP_RANGES:
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after
1133 fprintf(output, "count based %d, ", trans->count); 1137 fprintf(output, "count based %d, ", trans->count);
1134 } 1138 }
1135 if (trans->atom == NULL) { 1139 if (trans->atom == NULL) {
1136 fprintf(output, "epsilon to %d\n", trans->to); 1140 fprintf(output, "epsilon to %d\n", trans->to);
1137 return; 1141 return;
1138 } 1142 }
1139 if (trans->atom->type == XML_REGEXP_CHARVAL) 1143 if (trans->atom->type == XML_REGEXP_CHARVAL)
1140 fprintf(output, "char %c ", trans->atom->codepoint); 1144 fprintf(output, "char %c ", trans->atom->codepoint);
1141 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); 1145 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1142 } 1146 }
1143 1147
1144 static void 1148 static void
1145 xmlRegPrintState(FILE *output, xmlRegStatePtr state) { 1149 xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1146 int i; 1150 int i;
1147 1151
1148 fprintf(output, " state: "); 1152 fprintf(output, " state: ");
1149 if (state == NULL) { 1153 if (state == NULL) {
1150 fprintf(output, "NULL\n"); 1154 fprintf(output, "NULL\n");
1151 return; 1155 return;
1152 } 1156 }
1153 if (state->type == XML_REGEXP_START_STATE) 1157 if (state->type == XML_REGEXP_START_STATE)
1154 fprintf(output, "START "); 1158 fprintf(output, "START ");
1155 if (state->type == XML_REGEXP_FINAL_STATE) 1159 if (state->type == XML_REGEXP_FINAL_STATE)
1156 fprintf(output, "FINAL "); 1160 fprintf(output, "FINAL ");
1157 1161
1158 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); 1162 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1159 for (i = 0;i < state->nbTrans; i++) { 1163 for (i = 0;i < state->nbTrans; i++) {
1160 xmlRegPrintTrans(output, &(state->trans[i])); 1164 xmlRegPrintTrans(output, &(state->trans[i]));
1161 } 1165 }
1162 } 1166 }
1163 1167
1164 #ifdef DEBUG_REGEXP_GRAPH 1168 #ifdef DEBUG_REGEXP_GRAPH
1165 static void 1169 static void
1166 xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { 1170 xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1167 int i; 1171 int i;
(...skipping 29 matching lines...) Expand all
1197 } 1201 }
1198 fprintf(output, "%d counters:\n", ctxt->nbCounters); 1202 fprintf(output, "%d counters:\n", ctxt->nbCounters);
1199 for (i = 0;i < ctxt->nbCounters; i++) { 1203 for (i = 0;i < ctxt->nbCounters; i++) {
1200 fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, 1204 fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1201 ctxt->counters[i].max); 1205 ctxt->counters[i].max);
1202 } 1206 }
1203 } 1207 }
1204 #endif 1208 #endif
1205 1209
1206 /************************************************************************ 1210 /************************************************************************
1207 * » » » » » » » » » * 1211 *» » » » » » » » » *
1208 * Finite Automata structures manipulations * 1212 * Finite Automata structures manipulations *
1209 * » » » » » » » » » * 1213 *» » » » » » » » » *
1210 ************************************************************************/ 1214 ************************************************************************/
1211 1215
1212 static void 1216 static void
1213 xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, 1217 xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1214 int neg, xmlRegAtomType type, int start, int end, 1218 int neg, xmlRegAtomType type, int start, int end,
1215 xmlChar *blockName) { 1219 xmlChar *blockName) {
1216 xmlRegRangePtr range; 1220 xmlRegRangePtr range;
1217 1221
1218 if (atom == NULL) { 1222 if (atom == NULL) {
1219 ERROR("add range: atom is NULL"); 1223 ERROR("add range: atom is NULL");
1220 return; 1224 return;
1221 } 1225 }
1222 if (atom->type != XML_REGEXP_RANGES) { 1226 if (atom->type != XML_REGEXP_RANGES) {
(...skipping 19 matching lines...) Expand all
1242 atom->maxRanges /= 2; 1246 atom->maxRanges /= 2;
1243 return; 1247 return;
1244 } 1248 }
1245 atom->ranges = tmp; 1249 atom->ranges = tmp;
1246 } 1250 }
1247 range = xmlRegNewRange(ctxt, neg, type, start, end); 1251 range = xmlRegNewRange(ctxt, neg, type, start, end);
1248 if (range == NULL) 1252 if (range == NULL)
1249 return; 1253 return;
1250 range->blockName = blockName; 1254 range->blockName = blockName;
1251 atom->ranges[atom->nbRanges++] = range; 1255 atom->ranges[atom->nbRanges++] = range;
1252 1256
1253 } 1257 }
1254 1258
1255 static int 1259 static int
1256 xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { 1260 xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1257 if (ctxt->maxCounters == 0) { 1261 if (ctxt->maxCounters == 0) {
1258 ctxt->maxCounters = 4; 1262 ctxt->maxCounters = 4;
1259 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * 1263 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1260 sizeof(xmlRegCounter)); 1264 sizeof(xmlRegCounter));
1261 if (ctxt->counters == NULL) { 1265 if (ctxt->counters == NULL) {
1262 xmlRegexpErrMemory(ctxt, "allocating counter"); 1266 xmlRegexpErrMemory(ctxt, "allocating counter");
(...skipping 10 matching lines...) Expand all
1273 ctxt->maxCounters /= 2; 1277 ctxt->maxCounters /= 2;
1274 return(-1); 1278 return(-1);
1275 } 1279 }
1276 ctxt->counters = tmp; 1280 ctxt->counters = tmp;
1277 } 1281 }
1278 ctxt->counters[ctxt->nbCounters].min = -1; 1282 ctxt->counters[ctxt->nbCounters].min = -1;
1279 ctxt->counters[ctxt->nbCounters].max = -1; 1283 ctxt->counters[ctxt->nbCounters].max = -1;
1280 return(ctxt->nbCounters++); 1284 return(ctxt->nbCounters++);
1281 } 1285 }
1282 1286
1283 static int 1287 static int
1284 xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { 1288 xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1285 if (atom == NULL) { 1289 if (atom == NULL) {
1286 ERROR("atom push: atom is NULL"); 1290 ERROR("atom push: atom is NULL");
1287 return(-1); 1291 return(-1);
1288 } 1292 }
1289 if (ctxt->maxAtoms == 0) { 1293 if (ctxt->maxAtoms == 0) {
1290 ctxt->maxAtoms = 4; 1294 ctxt->maxAtoms = 4;
1291 ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * 1295 ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1292 sizeof(xmlRegAtomPtr)); 1296 sizeof(xmlRegAtomPtr));
1293 if (ctxt->atoms == NULL) { 1297 if (ctxt->atoms == NULL) {
(...skipping 11 matching lines...) Expand all
1305 ctxt->maxAtoms /= 2; 1309 ctxt->maxAtoms /= 2;
1306 return(-1); 1310 return(-1);
1307 } 1311 }
1308 ctxt->atoms = tmp; 1312 ctxt->atoms = tmp;
1309 } 1313 }
1310 atom->no = ctxt->nbAtoms; 1314 atom->no = ctxt->nbAtoms;
1311 ctxt->atoms[ctxt->nbAtoms++] = atom; 1315 ctxt->atoms[ctxt->nbAtoms++] = atom;
1312 return(0); 1316 return(0);
1313 } 1317 }
1314 1318
1315 static void 1319 static void
1316 xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target, 1320 xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1317 int from) { 1321 int from) {
1318 if (target->maxTransTo == 0) { 1322 if (target->maxTransTo == 0) {
1319 target->maxTransTo = 8; 1323 target->maxTransTo = 8;
1320 target->transTo = (int *) xmlMalloc(target->maxTransTo * 1324 target->transTo = (int *) xmlMalloc(target->maxTransTo *
1321 sizeof(int)); 1325 sizeof(int));
1322 if (target->transTo == NULL) { 1326 if (target->transTo == NULL) {
1323 xmlRegexpErrMemory(ctxt, "adding transition"); 1327 xmlRegexpErrMemory(ctxt, "adding transition");
1324 target->maxTransTo = 0; 1328 target->maxTransTo = 0;
1325 return; 1329 return;
1326 } 1330 }
1327 } else if (target->nbTransTo >= target->maxTransTo) { 1331 } else if (target->nbTransTo >= target->maxTransTo) {
1328 int *tmp; 1332 int *tmp;
1329 target->maxTransTo *= 2; 1333 target->maxTransTo *= 2;
1330 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo * 1334 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1331 sizeof(int)); 1335 sizeof(int));
1332 if (tmp == NULL) { 1336 if (tmp == NULL) {
1333 xmlRegexpErrMemory(ctxt, "adding transition"); 1337 xmlRegexpErrMemory(ctxt, "adding transition");
1334 target->maxTransTo /= 2; 1338 target->maxTransTo /= 2;
1335 return; 1339 return;
1336 } 1340 }
1337 target->transTo = tmp; 1341 target->transTo = tmp;
1338 } 1342 }
1339 target->transTo[target->nbTransTo] = from; 1343 target->transTo[target->nbTransTo] = from;
1340 target->nbTransTo++; 1344 target->nbTransTo++;
1341 } 1345 }
1342 1346
1343 static void 1347 static void
1344 xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 1348 xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1345 xmlRegAtomPtr atom, xmlRegStatePtr target, 1349 xmlRegAtomPtr atom, xmlRegStatePtr target,
1346 int counter, int count) { 1350 int counter, int count) {
1347 1351
1348 int nrtrans; 1352 int nrtrans;
1349 1353
1350 if (state == NULL) { 1354 if (state == NULL) {
1351 ERROR("add state: state is NULL"); 1355 ERROR("add state: state is NULL");
1352 return; 1356 return;
1353 } 1357 }
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
1399 #ifdef DEBUG_REGEXP_GRAPH 1403 #ifdef DEBUG_REGEXP_GRAPH
1400 printf("Add trans from %d to %d ", state->no, target->no); 1404 printf("Add trans from %d to %d ", state->no, target->no);
1401 if (count == REGEXP_ALL_COUNTER) 1405 if (count == REGEXP_ALL_COUNTER)
1402 printf("all transition\n"); 1406 printf("all transition\n");
1403 else if (count >= 0) 1407 else if (count >= 0)
1404 printf("count based %d\n", count); 1408 printf("count based %d\n", count);
1405 else if (counter >= 0) 1409 else if (counter >= 0)
1406 printf("counted %d\n", counter); 1410 printf("counted %d\n", counter);
1407 else if (atom == NULL) 1411 else if (atom == NULL)
1408 printf("epsilon transition\n"); 1412 printf("epsilon transition\n");
1409 else if (atom != NULL) 1413 else if (atom != NULL)
1410 xmlRegPrintAtom(stdout, atom); 1414 xmlRegPrintAtom(stdout, atom);
1411 #endif 1415 #endif
1412 1416
1413 state->trans[state->nbTrans].atom = atom; 1417 state->trans[state->nbTrans].atom = atom;
1414 state->trans[state->nbTrans].to = target->no; 1418 state->trans[state->nbTrans].to = target->no;
1415 state->trans[state->nbTrans].counter = counter; 1419 state->trans[state->nbTrans].counter = counter;
1416 state->trans[state->nbTrans].count = count; 1420 state->trans[state->nbTrans].count = count;
1417 state->trans[state->nbTrans].nd = 0; 1421 state->trans[state->nbTrans].nd = 0;
1418 state->nbTrans++; 1422 state->nbTrans++;
1419 xmlRegStateAddTransTo(ctxt, target, state->no); 1423 xmlRegStateAddTransTo(ctxt, target, state->no);
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
1553 if (xmlRegAtomPush(ctxt, atom) < 0) { 1557 if (xmlRegAtomPush(ctxt, atom) < 0) {
1554 return(-1); 1558 return(-1);
1555 } 1559 }
1556 if ((to != NULL) && (atom->stop != to) && 1560 if ((to != NULL) && (atom->stop != to) &&
1557 (atom->quant != XML_REGEXP_QUANT_RANGE)) { 1561 (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1558 /* 1562 /*
1559 * Generate an epsilon transition to link to the target 1563 * Generate an epsilon transition to link to the target
1560 */ 1564 */
1561 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); 1565 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1562 #ifdef DV 1566 #ifdef DV
1563 » } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 1567 » } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1564 (atom->quant != XML_REGEXP_QUANT_ONCE)) { 1568 (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1565 to = xmlRegNewState(ctxt); 1569 to = xmlRegNewState(ctxt);
1566 xmlRegStatePush(ctxt, to); 1570 xmlRegStatePush(ctxt, to);
1567 ctxt->state = to; 1571 ctxt->state = to;
1568 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); 1572 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1569 #endif 1573 #endif
1570 } 1574 }
1571 switch (atom->quant) { 1575 switch (atom->quant) {
1572 case XML_REGEXP_QUANT_OPT: 1576 case XML_REGEXP_QUANT_OPT:
1573 atom->quant = XML_REGEXP_QUANT_ONCE; 1577 atom->quant = XML_REGEXP_QUANT_ONCE;
1574 /* 1578 /*
1575 * transition done to the state after end of atom. 1579 * transition done to the state after end of atom.
1576 * 1. set transition from atom start to new state 1580 * 1. set transition from atom start to new state
1577 » » * 2. set transition from atom end to this state. 1581 » » * 2. set transition from atom end to this state.
1578 */ 1582 */
1579 if (to == NULL) { 1583 if (to == NULL) {
1580 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0); 1584 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1581 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, 1585 xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1582 ctxt->state); 1586 ctxt->state);
1583 } else { 1587 } else {
1584 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to); 1588 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1585 } 1589 }
1586 break; 1590 break;
1587 case XML_REGEXP_QUANT_MULT: 1591 case XML_REGEXP_QUANT_MULT:
(...skipping 23 matching lines...) Expand all
1611 * The principle here is to use counted transition 1615 * The principle here is to use counted transition
1612 * to avoid explosion in the number of states in the 1616 * to avoid explosion in the number of states in the
1613 * graph. This is clearly more complex but should not 1617 * graph. This is clearly more complex but should not
1614 * be exploitable at runtime. 1618 * be exploitable at runtime.
1615 */ 1619 */
1616 if ((atom->min == 0) && (atom->start0 == NULL)) { 1620 if ((atom->min == 0) && (atom->start0 == NULL)) {
1617 xmlRegAtomPtr copy; 1621 xmlRegAtomPtr copy;
1618 /* 1622 /*
1619 * duplicate a transition based on atom to count next 1623 * duplicate a transition based on atom to count next
1620 * occurences after 1. We cannot loop to atom->start 1624 * occurences after 1. We cannot loop to atom->start
1621 » » * directly because we need an epsilon transition to 1625 » » * directly because we need an epsilon transition to
1622 * newstate. 1626 * newstate.
1623 */ 1627 */
1624 /* ???? For some reason it seems we never reach that 1628 /* ???? For some reason it seems we never reach that
1625 case, I suppose this got optimized out before when 1629 case, I suppose this got optimized out before when
1626 building the automata */ 1630 building the automata */
1627 copy = xmlRegCopyAtom(ctxt, atom); 1631 copy = xmlRegCopyAtom(ctxt, atom);
1628 if (copy == NULL) 1632 if (copy == NULL)
1629 return(-1); 1633 return(-1);
1630 copy->quant = XML_REGEXP_QUANT_ONCE; 1634 copy->quant = XML_REGEXP_QUANT_ONCE;
1631 copy->min = 0; 1635 copy->min = 0;
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1670 } 1674 }
1671 atom->min = 0; 1675 atom->min = 0;
1672 atom->max = 0; 1676 atom->max = 0;
1673 atom->quant = XML_REGEXP_QUANT_ONCE; 1677 atom->quant = XML_REGEXP_QUANT_ONCE;
1674 ctxt->state = newstate; 1678 ctxt->state = newstate;
1675 } 1679 }
1676 default: 1680 default:
1677 break; 1681 break;
1678 } 1682 }
1679 return(0); 1683 return(0);
1680 } 1684 }
1681 if ((atom->min == 0) && (atom->max == 0) && 1685 if ((atom->min == 0) && (atom->max == 0) &&
1682 (atom->quant == XML_REGEXP_QUANT_RANGE)) { 1686 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1683 /* 1687 /*
1684 * we can discard the atom and generate an epsilon transition instead 1688 * we can discard the atom and generate an epsilon transition instead
1685 */ 1689 */
1686 if (to == NULL) { 1690 if (to == NULL) {
1687 to = xmlRegNewState(ctxt); 1691 to = xmlRegNewState(ctxt);
1688 if (to != NULL) 1692 if (to != NULL)
1689 xmlRegStatePush(ctxt, to); 1693 xmlRegStatePush(ctxt, to);
1690 else { 1694 else {
1691 return(-1); 1695 return(-1);
1692 } 1696 }
1693 } 1697 }
1694 xmlFAGenerateEpsilonTransition(ctxt, from, to); 1698 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1695 ctxt->state = to; 1699 ctxt->state = to;
1696 xmlRegFreeAtom(atom); 1700 xmlRegFreeAtom(atom);
1697 return(0); 1701 return(0);
1698 } 1702 }
1699 if (to == NULL) { 1703 if (to == NULL) {
1700 to = xmlRegNewState(ctxt); 1704 to = xmlRegNewState(ctxt);
1701 if (to != NULL) 1705 if (to != NULL)
1702 xmlRegStatePush(ctxt, to); 1706 xmlRegStatePush(ctxt, to);
1703 else { 1707 else {
1704 return(-1); 1708 return(-1);
1705 } 1709 }
1706 } 1710 }
1707 end = to; 1711 end = to;
1708 if ((atom->quant == XML_REGEXP_QUANT_MULT) || 1712 if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1709 (atom->quant == XML_REGEXP_QUANT_PLUS)) { 1713 (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1710 /* 1714 /*
1711 * Do not pollute the target state by adding transitions from 1715 * Do not pollute the target state by adding transitions from
1712 * it as it is likely to be the shared target of multiple branches. 1716 * it as it is likely to be the shared target of multiple branches.
1713 * So isolate with an epsilon transition. 1717 * So isolate with an epsilon transition.
1714 */ 1718 */
1715 xmlRegStatePtr tmp; 1719 xmlRegStatePtr tmp;
1716 » 1720
1717 tmp = xmlRegNewState(ctxt); 1721 tmp = xmlRegNewState(ctxt);
1718 if (tmp != NULL) 1722 if (tmp != NULL)
1719 xmlRegStatePush(ctxt, tmp); 1723 xmlRegStatePush(ctxt, tmp);
1720 else { 1724 else {
1721 return(-1); 1725 return(-1);
1722 } 1726 }
1723 xmlFAGenerateEpsilonTransition(ctxt, tmp, to); 1727 xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1724 to = tmp; 1728 to = tmp;
1725 } 1729 }
1726 if (xmlRegAtomPush(ctxt, atom) < 0) { 1730 if (xmlRegAtomPush(ctxt, atom) < 0) {
1727 return(-1); 1731 return(-1);
1728 } 1732 }
1729 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); 1733 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1730 ctxt->state = end; 1734 ctxt->state = end;
1731 switch (atom->quant) { 1735 switch (atom->quant) {
1732 case XML_REGEXP_QUANT_OPT: 1736 case XML_REGEXP_QUANT_OPT:
1733 atom->quant = XML_REGEXP_QUANT_ONCE; 1737 atom->quant = XML_REGEXP_QUANT_ONCE;
1734 xmlFAGenerateEpsilonTransition(ctxt, from, to); 1738 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1735 break; 1739 break;
1736 case XML_REGEXP_QUANT_MULT: 1740 case XML_REGEXP_QUANT_MULT:
1737 atom->quant = XML_REGEXP_QUANT_ONCE; 1741 atom->quant = XML_REGEXP_QUANT_ONCE;
1738 xmlFAGenerateEpsilonTransition(ctxt, from, to); 1742 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1739 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); 1743 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1740 break; 1744 break;
1741 case XML_REGEXP_QUANT_PLUS: 1745 case XML_REGEXP_QUANT_PLUS:
1742 atom->quant = XML_REGEXP_QUANT_ONCE; 1746 atom->quant = XML_REGEXP_QUANT_ONCE;
1743 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); 1747 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1744 break; 1748 break;
1745 » case XML_REGEXP_QUANT_RANGE: 1749 » case XML_REGEXP_QUANT_RANGE:
1746 #if DV_test 1750 #if DV_test
1747 if (atom->min == 0) { 1751 if (atom->min == 0) {
1748 xmlFAGenerateEpsilonTransition(ctxt, from, to); 1752 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1749 } 1753 }
1750 #endif 1754 #endif
1751 break; 1755 break;
1752 default: 1756 default:
1753 break; 1757 break;
1754 } 1758 }
1755 return(0); 1759 return(0);
1756 } 1760 }
1757 1761
1758 /** 1762 /**
1759 * xmlFAReduceEpsilonTransitions: 1763 * xmlFAReduceEpsilonTransitions:
1760 * @ctxt: a regexp parser context 1764 * @ctxt: a regexp parser context
1761 * @fromnr: the from state 1765 * @fromnr: the from state
1762 * @tonr: the to state 1766 * @tonr: the to state
1763 * @counter: should that transition be associated to a counted 1767 * @counter: should that transition be associated to a counted
1764 * 1768 *
1765 */ 1769 */
1766 static void 1770 static void
1767 xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, 1771 xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1768 int tonr, int counter) { 1772 int tonr, int counter) {
1769 int transnr; 1773 int transnr;
1770 xmlRegStatePtr from; 1774 xmlRegStatePtr from;
1771 xmlRegStatePtr to; 1775 xmlRegStatePtr to;
1772 1776
(...skipping 23 matching lines...) Expand all
1796 if (to->trans[transnr].atom == NULL) { 1800 if (to->trans[transnr].atom == NULL) {
1797 /* 1801 /*
1798 * Don't remove counted transitions 1802 * Don't remove counted transitions
1799 * Don't loop either 1803 * Don't loop either
1800 */ 1804 */
1801 if (to->trans[transnr].to != fromnr) { 1805 if (to->trans[transnr].to != fromnr) {
1802 if (to->trans[transnr].count >= 0) { 1806 if (to->trans[transnr].count >= 0) {
1803 int newto = to->trans[transnr].to; 1807 int newto = to->trans[transnr].to;
1804 1808
1805 xmlRegStateAddTrans(ctxt, from, NULL, 1809 xmlRegStateAddTrans(ctxt, from, NULL,
1806 » » » » » ctxt->states[newto], 1810 » » » » » ctxt->states[newto],
1807 -1, to->trans[transnr].count); 1811 -1, to->trans[transnr].count);
1808 } else { 1812 } else {
1809 #ifdef DEBUG_REGEXP_GRAPH 1813 #ifdef DEBUG_REGEXP_GRAPH
1810 printf("Found epsilon trans %d from %d to %d\n", 1814 printf("Found epsilon trans %d from %d to %d\n",
1811 transnr, tonr, to->trans[transnr].to); 1815 transnr, tonr, to->trans[transnr].to);
1812 #endif 1816 #endif
1813 if (to->trans[transnr].counter >= 0) { 1817 if (to->trans[transnr].counter >= 0) {
1814 xmlFAReduceEpsilonTransitions(ctxt, fromnr, 1818 xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1815 to->trans[transnr].to, 1819 to->trans[transnr].to,
1816 to->trans[transnr].counter); 1820 to->trans[transnr].counter);
1817 } else { 1821 } else {
1818 xmlFAReduceEpsilonTransitions(ctxt, fromnr, 1822 xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1819 to->trans[transnr].to, 1823 to->trans[transnr].to,
1820 counter); 1824 counter);
1821 } 1825 }
1822 } 1826 }
1823 } 1827 }
1824 } else { 1828 } else {
1825 int newto = to->trans[transnr].to; 1829 int newto = to->trans[transnr].to;
1826 1830
1827 if (to->trans[transnr].counter >= 0) { 1831 if (to->trans[transnr].counter >= 0) {
1828 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 1832 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1829 » » » » ctxt->states[newto], 1833 » » » » ctxt->states[newto],
1830 to->trans[transnr].counter, -1); 1834 to->trans[transnr].counter, -1);
1831 } else { 1835 } else {
1832 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 1836 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1833 ctxt->states[newto], counter, -1); 1837 ctxt->states[newto], counter, -1);
1834 } 1838 }
1835 } 1839 }
1836 } 1840 }
1837 to->mark = XML_REGEXP_MARK_NORMAL; 1841 to->mark = XML_REGEXP_MARK_NORMAL;
1838 } 1842 }
1839 1843
1840 /** 1844 /**
1841 * xmlFAEliminateSimpleEpsilonTransitions: 1845 * xmlFAEliminateSimpleEpsilonTransitions:
1842 * @ctxt: a regexp parser context 1846 * @ctxt: a regexp parser context
1843 * 1847 *
1844 * Eliminating general epsilon transitions can get costly in the general 1848 * Eliminating general epsilon transitions can get costly in the general
1845 * algorithm due to the large amount of generated new transitions and 1849 * algorithm due to the large amount of generated new transitions and
1846 * associated comparisons. However for simple epsilon transition used just 1850 * associated comparisons. However for simple epsilon transition used just
1847 * to separate building blocks when generating the automata this can be 1851 * to separate building blocks when generating the automata this can be
1848 * reduced to state elimination: 1852 * reduced to state elimination:
1849 * - if there exists an epsilon from X to Y 1853 * - if there exists an epsilon from X to Y
1850 * - if there is no other transition from X 1854 * - if there is no other transition from X
1851 * then X and Y are semantically equivalent and X can be eliminated 1855 * then X and Y are semantically equivalent and X can be eliminated
1852 * If X is the start state then make Y the start state, else replace the 1856 * If X is the start state then make Y the start state, else replace the
1853 * target of all transitions to X by transitions to Y. 1857 * target of all transitions to X by transitions to Y.
1854 */ 1858 */
(...skipping 15 matching lines...) Expand all
1870 (state->trans[0].to >= 0) && 1874 (state->trans[0].to >= 0) &&
1871 (state->trans[0].to != statenr) && 1875 (state->trans[0].to != statenr) &&
1872 (state->trans[0].counter < 0) && 1876 (state->trans[0].counter < 0) &&
1873 (state->trans[0].count < 0)) { 1877 (state->trans[0].count < 0)) {
1874 newto = state->trans[0].to; 1878 newto = state->trans[0].to;
1875 1879
1876 if (state->type == XML_REGEXP_START_STATE) { 1880 if (state->type == XML_REGEXP_START_STATE) {
1877 #ifdef DEBUG_REGEXP_GRAPH 1881 #ifdef DEBUG_REGEXP_GRAPH
1878 printf("Found simple epsilon trans from start %d to %d\n", 1882 printf("Found simple epsilon trans from start %d to %d\n",
1879 statenr, newto); 1883 statenr, newto);
1880 #endif 1884 #endif
1881 } else { 1885 } else {
1882 #ifdef DEBUG_REGEXP_GRAPH 1886 #ifdef DEBUG_REGEXP_GRAPH
1883 printf("Found simple epsilon trans from %d to %d\n", 1887 printf("Found simple epsilon trans from %d to %d\n",
1884 statenr, newto); 1888 statenr, newto);
1885 #endif 1889 #endif
1886 for (i = 0;i < state->nbTransTo;i++) { 1890 for (i = 0;i < state->nbTransTo;i++) {
1887 tmp = ctxt->states[state->transTo[i]]; 1891 tmp = ctxt->states[state->transTo[i]];
1888 for (j = 0;j < tmp->nbTrans;j++) { 1892 for (j = 0;j < tmp->nbTrans;j++) {
1889 if (tmp->trans[j].to == statenr) { 1893 if (tmp->trans[j].to == statenr) {
1890 #ifdef DEBUG_REGEXP_GRAPH 1894 #ifdef DEBUG_REGEXP_GRAPH
1891 printf("Changed transition %d on %d to go to %d\n", 1895 printf("Changed transition %d on %d to go to %d\n",
1892 j, tmp->no, newto); 1896 j, tmp->no, newto);
1893 #endif 1897 #endif
1894 tmp->trans[j].to = -1; 1898 tmp->trans[j].to = -1;
1895 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom, 1899 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
1896 » » » » » » ctxt->states[newto], 1900 » » » » » » ctxt->states[newto],
1897 tmp->trans[j].counter, 1901 tmp->trans[j].counter,
1898 tmp->trans[j].count); 1902 tmp->trans[j].count);
1899 } 1903 }
1900 } 1904 }
1901 } 1905 }
1902 if (state->type == XML_REGEXP_FINAL_STATE) 1906 if (state->type == XML_REGEXP_FINAL_STATE)
1903 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; 1907 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1904 /* eliminate the transition completely */ 1908 /* eliminate the transition completely */
1905 state->nbTrans = 0; 1909 state->nbTrans = 0;
1906 1910
1907 state->type = XML_REGEXP_UNREACH_STATE; 1911 state->type = XML_REGEXP_UNREACH_STATE;
1908 1912
1909 } 1913 }
1910 1914
1911 } 1915 }
1912 } 1916 }
1913 } 1917 }
1914 /** 1918 /**
1915 * xmlFAEliminateEpsilonTransitions: 1919 * xmlFAEliminateEpsilonTransitions:
1916 * @ctxt: a regexp parser context 1920 * @ctxt: a regexp parser context
1917 * 1921 *
1918 */ 1922 */
1919 static void 1923 static void
1920 xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { 1924 xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
2102 ret = 0; 2106 ret = 0;
2103 else 2107 else
2104 ret = 1; 2108 ret = 1;
2105 } else if (range1->type == XML_REGEXP_CHARVAL) { 2109 } else if (range1->type == XML_REGEXP_CHARVAL) {
2106 int codepoint; 2110 int codepoint;
2107 int neg = 0; 2111 int neg = 0;
2108 2112
2109 /* 2113 /*
2110 * just check all codepoints in the range for acceptance, 2114 * just check all codepoints in the range for acceptance,
2111 * this is usually way cheaper since done only once at 2115 * this is usually way cheaper since done only once at
2112 » * compilation than testing over and over at runtime or 2116 » * compilation than testing over and over at runtime or
2113 * pushing too many states when evaluating. 2117 * pushing too many states when evaluating.
2114 */ 2118 */
2115 if (((range1->neg == 0) && (range2->neg != 0)) || 2119 if (((range1->neg == 0) && (range2->neg != 0)) ||
2116 ((range1->neg != 0) && (range2->neg == 0))) 2120 ((range1->neg != 0) && (range2->neg == 0)))
2117 neg = 1; 2121 neg = 1;
2118 2122
2119 for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) { 2123 for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
2120 ret = xmlRegCheckCharacterRange(range2->type, codepoint, 2124 ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2121 0, range2->start, range2->end, 2125 0, range2->start, range2->end,
2122 range2->blockName); 2126 range2->blockName);
(...skipping 456 matching lines...) Expand 10 before | Expand all | Expand 10 after
2579 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 2583 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2580 int to, xmlRegAtomPtr atom) { 2584 int to, xmlRegAtomPtr atom) {
2581 int ret = 1; 2585 int ret = 1;
2582 int res; 2586 int res;
2583 int transnr, nbTrans; 2587 int transnr, nbTrans;
2584 xmlRegTransPtr t1; 2588 xmlRegTransPtr t1;
2585 int deep = 1; 2589 int deep = 1;
2586 2590
2587 if (state == NULL) 2591 if (state == NULL)
2588 return(ret); 2592 return(ret);
2593 if (state->markd == XML_REGEXP_MARK_VISITED)
2594 return(ret);
2589 2595
2590 if (ctxt->flags & AM_AUTOMATA_RNG) 2596 if (ctxt->flags & AM_AUTOMATA_RNG)
2591 deep = 0; 2597 deep = 0;
2592 2598
2593 /* 2599 /*
2594 * don't recurse on transitions potentially added in the course of 2600 * don't recurse on transitions potentially added in the course of
2595 * the elimination. 2601 * the elimination.
2596 */ 2602 */
2597 nbTrans = state->nbTrans; 2603 nbTrans = state->nbTrans;
2598 for (transnr = 0;transnr < nbTrans;transnr++) { 2604 for (transnr = 0;transnr < nbTrans;transnr++) {
2599 t1 = &(state->trans[transnr]); 2605 t1 = &(state->trans[transnr]);
2600 /* 2606 /*
2601 * check transitions conflicting with the one looked at 2607 * check transitions conflicting with the one looked at
2602 */ 2608 */
2603 if (t1->atom == NULL) { 2609 if (t1->atom == NULL) {
2604 if (t1->to < 0) 2610 if (t1->to < 0)
2605 continue; 2611 continue;
2612 state->markd = XML_REGEXP_MARK_VISITED;
2606 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 2613 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2607 to, atom); 2614 to, atom);
2615 state->markd = 0;
2608 if (res == 0) { 2616 if (res == 0) {
2609 ret = 0; 2617 ret = 0;
2610 /* t1->nd = 1; */ 2618 /* t1->nd = 1; */
2611 } 2619 }
2612 continue; 2620 continue;
2613 } 2621 }
2614 if (t1->to != to) 2622 if (t1->to != to)
2615 continue; 2623 continue;
2616 if (xmlFACompareAtoms(t1->atom, atom, deep)) { 2624 if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2617 ret = 0; 2625 ret = 0;
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
2766 transition get marked down 2774 transition get marked down
2767 if (ret == 0) 2775 if (ret == 0)
2768 break; */ 2776 break; */
2769 } 2777 }
2770 2778
2771 ctxt->determinist = ret; 2779 ctxt->determinist = ret;
2772 return(ret); 2780 return(ret);
2773 } 2781 }
2774 2782
2775 /************************************************************************ 2783 /************************************************************************
2776 * » » » » » » » » » * 2784 *» » » » » » » » » *
2777 * Routines to check input against transition atoms * 2785 * Routines to check input against transition atoms *
2778 * » » » » » » » » » * 2786 *» » » » » » » » » *
2779 ************************************************************************/ 2787 ************************************************************************/
2780 2788
2781 static int 2789 static int
2782 xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, 2790 xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2783 int start, int end, const xmlChar *blockName) { 2791 int start, int end, const xmlChar *blockName) {
2784 int ret = 0; 2792 int ret = 0;
2785 2793
2786 switch (type) { 2794 switch (type) {
2787 case XML_REGEXP_STRING: 2795 case XML_REGEXP_STRING:
2788 case XML_REGEXP_SUBREG: 2796 case XML_REGEXP_SUBREG:
2789 case XML_REGEXP_RANGES: 2797 case XML_REGEXP_RANGES:
2790 case XML_REGEXP_EPSILON: 2798 case XML_REGEXP_EPSILON:
2791 return(-1); 2799 return(-1);
2792 case XML_REGEXP_ANYCHAR: 2800 case XML_REGEXP_ANYCHAR:
2793 ret = ((codepoint != '\n') && (codepoint != '\r')); 2801 ret = ((codepoint != '\n') && (codepoint != '\r'));
2794 break; 2802 break;
2795 case XML_REGEXP_CHARVAL: 2803 case XML_REGEXP_CHARVAL:
2796 ret = ((codepoint >= start) && (codepoint <= end)); 2804 ret = ((codepoint >= start) && (codepoint <= end));
2797 break; 2805 break;
2798 case XML_REGEXP_NOTSPACE: 2806 case XML_REGEXP_NOTSPACE:
2799 neg = !neg; 2807 neg = !neg;
2800 case XML_REGEXP_ANYSPACE: 2808 case XML_REGEXP_ANYSPACE:
2801 ret = ((codepoint == '\n') || (codepoint == '\r') || 2809 ret = ((codepoint == '\n') || (codepoint == '\r') ||
2802 (codepoint == '\t') || (codepoint == ' ')); 2810 (codepoint == '\t') || (codepoint == ' '));
2803 break; 2811 break;
2804 case XML_REGEXP_NOTINITNAME: 2812 case XML_REGEXP_NOTINITNAME:
2805 neg = !neg; 2813 neg = !neg;
2806 case XML_REGEXP_INITNAME: 2814 case XML_REGEXP_INITNAME:
2807 » ret = (IS_LETTER(codepoint) || 2815 » ret = (IS_LETTER(codepoint) ||
2808 (codepoint == '_') || (codepoint == ':')); 2816 (codepoint == '_') || (codepoint == ':'));
2809 break; 2817 break;
2810 case XML_REGEXP_NOTNAMECHAR: 2818 case XML_REGEXP_NOTNAMECHAR:
2811 neg = !neg; 2819 neg = !neg;
2812 case XML_REGEXP_NAMECHAR: 2820 case XML_REGEXP_NAMECHAR:
2813 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) || 2821 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2814 (codepoint == '.') || (codepoint == '-') || 2822 (codepoint == '.') || (codepoint == '-') ||
2815 (codepoint == '_') || (codepoint == ':') || 2823 (codepoint == '_') || (codepoint == ':') ||
2816 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint)); 2824 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2817 break; 2825 break;
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
3045 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, 3053 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3046 (const xmlChar *)atom->valuep); 3054 (const xmlChar *)atom->valuep);
3047 if (atom->neg) 3055 if (atom->neg)
3048 ret = !ret; 3056 ret = !ret;
3049 break; 3057 break;
3050 } 3058 }
3051 return(ret); 3059 return(ret);
3052 } 3060 }
3053 3061
3054 /************************************************************************ 3062 /************************************************************************
3055 * » » » » » » » » » * 3063 *» » » » » » » » » *
3056 * Saving and restoring state of an execution context * 3064 * Saving and restoring state of an execution context *
3057 * » » » » » » » » » * 3065 *» » » » » » » » » *
3058 ************************************************************************/ 3066 ************************************************************************/
3059 3067
3060 #ifdef DEBUG_REGEXP_EXEC 3068 #ifdef DEBUG_REGEXP_EXEC
3061 static void 3069 static void
3062 xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { 3070 xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
3063 printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); 3071 printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
3064 if (exec->inputStack != NULL) { 3072 if (exec->inputStack != NULL) {
3065 int i; 3073 int i;
3066 printf(": "); 3074 printf(": ");
3067 for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) 3075 for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
3147 exec->nbRollbacks--; 3155 exec->nbRollbacks--;
3148 exec->state = exec->rollbacks[exec->nbRollbacks].state; 3156 exec->state = exec->rollbacks[exec->nbRollbacks].state;
3149 exec->index = exec->rollbacks[exec->nbRollbacks].index; 3157 exec->index = exec->rollbacks[exec->nbRollbacks].index;
3150 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; 3158 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3151 if (exec->comp->nbCounters > 0) { 3159 if (exec->comp->nbCounters > 0) {
3152 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 3160 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3153 fprintf(stderr, "exec save: allocation failed"); 3161 fprintf(stderr, "exec save: allocation failed");
3154 exec->status = -6; 3162 exec->status = -6;
3155 return; 3163 return;
3156 } 3164 }
3157 » memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, 3165 » if (exec->counts) {
3166 » memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3158 exec->comp->nbCounters * sizeof(int)); 3167 exec->comp->nbCounters * sizeof(int));
3168 }
3159 } 3169 }
3160 3170
3161 #ifdef DEBUG_REGEXP_EXEC 3171 #ifdef DEBUG_REGEXP_EXEC
3162 printf("restored "); 3172 printf("restored ");
3163 xmlFARegDebugExec(exec); 3173 xmlFARegDebugExec(exec);
3164 #endif 3174 #endif
3165 } 3175 }
3166 3176
3167 /************************************************************************ 3177 /************************************************************************
3168 * » » » » » » » » » * 3178 *» » » » » » » » » *
3169 * Verifier, running an input against a compiled regexp * 3179 * Verifier, running an input against a compiled regexp *
3170 * » » » » » » » » » * 3180 *» » » » » » » » » *
3171 ************************************************************************/ 3181 ************************************************************************/
3172 3182
3173 static int 3183 static int
3174 xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { 3184 xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
3175 xmlRegExecCtxt execval; 3185 xmlRegExecCtxt execval;
3176 xmlRegExecCtxtPtr exec = &execval; 3186 xmlRegExecCtxtPtr exec = &execval;
3177 int ret, codepoint = 0, len, deter; 3187 int ret, codepoint = 0, len, deter;
3178 3188
3179 exec->inputString = content; 3189 exec->inputString = content;
3180 exec->index = 0; 3190 exec->index = 0;
(...skipping 11 matching lines...) Expand all
3192 exec->inputStackMax = 0; 3202 exec->inputStackMax = 0;
3193 if (comp->nbCounters > 0) { 3203 if (comp->nbCounters > 0) {
3194 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); 3204 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
3195 if (exec->counts == NULL) { 3205 if (exec->counts == NULL) {
3196 xmlRegexpErrMemory(NULL, "running regexp"); 3206 xmlRegexpErrMemory(NULL, "running regexp");
3197 return(-1); 3207 return(-1);
3198 } 3208 }
3199 memset(exec->counts, 0, comp->nbCounters * sizeof(int)); 3209 memset(exec->counts, 0, comp->nbCounters * sizeof(int));
3200 } else 3210 } else
3201 exec->counts = NULL; 3211 exec->counts = NULL;
3202 while ((exec->status == 0) && 3212 while ((exec->status == 0) && (exec->state != NULL) &&
3203 ((exec->inputString[exec->index] != 0) || 3213 ((exec->inputString[exec->index] != 0) ||
3204 ((exec->state != NULL) && 3214 ((exec->state != NULL) &&
3205 (exec->state->type != XML_REGEXP_FINAL_STATE)))) { 3215 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3206 xmlRegTransPtr trans; 3216 xmlRegTransPtr trans;
3207 xmlRegAtomPtr atom; 3217 xmlRegAtomPtr atom;
3208 3218
3209 /* 3219 /*
3210 * If end of input on non-terminal state, rollback, however we may 3220 * If end of input on non-terminal state, rollback, however we may
3211 * still have epsilon like transition for counted transitions 3221 * still have epsilon like transition for counted transitions
3212 * on counters, in that case don't break too early. Additionally, 3222 * on counters, in that case don't break too early. Additionally,
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
3446 if (exec->rollbacks != NULL) { 3456 if (exec->rollbacks != NULL) {
3447 if (exec->counts != NULL) { 3457 if (exec->counts != NULL) {
3448 int i; 3458 int i;
3449 3459
3450 for (i = 0;i < exec->maxRollbacks;i++) 3460 for (i = 0;i < exec->maxRollbacks;i++)
3451 if (exec->rollbacks[i].counts != NULL) 3461 if (exec->rollbacks[i].counts != NULL)
3452 xmlFree(exec->rollbacks[i].counts); 3462 xmlFree(exec->rollbacks[i].counts);
3453 } 3463 }
3454 xmlFree(exec->rollbacks); 3464 xmlFree(exec->rollbacks);
3455 } 3465 }
3466 if (exec->state == NULL)
3467 return(-1);
3456 if (exec->counts != NULL) 3468 if (exec->counts != NULL)
3457 xmlFree(exec->counts); 3469 xmlFree(exec->counts);
3458 if (exec->status == 0) 3470 if (exec->status == 0)
3459 return(1); 3471 return(1);
3460 if (exec->status == -1) { 3472 if (exec->status == -1) {
3461 if (exec->nbPush > MAX_PUSH) 3473 if (exec->nbPush > MAX_PUSH)
3462 return(-1); 3474 return(-1);
3463 return(0); 3475 return(0);
3464 } 3476 }
3465 return(exec->status); 3477 return(exec->status);
3466 } 3478 }
3467 3479
3468 /************************************************************************ 3480 /************************************************************************
3469 * » » » » » » » » » * 3481 *» » » » » » » » » *
3470 * Progressive interface to the verifier one atom at a time * 3482 * Progressive interface to the verifier one atom at a time *
3471 * » » » » » » » » » * 3483 *» » » » » » » » » *
3472 ************************************************************************/ 3484 ************************************************************************/
3473 #ifdef DEBUG_ERR 3485 #ifdef DEBUG_ERR
3474 static void testerr(xmlRegExecCtxtPtr exec); 3486 static void testerr(xmlRegExecCtxtPtr exec);
3475 #endif 3487 #endif
3476 3488
3477 /** 3489 /**
3478 * xmlRegNewExecCtxt: 3490 * xmlRegNewExecCtxt:
3479 * @comp: a precompiled regular expression 3491 * @comp: a precompiled regular expression
3480 * @callback: a callback function used for handling progresses in the 3492 * @callback: a callback function used for handling progresses in the
3481 * automata matching phase 3493 * automata matching phase
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
3578 } 3590 }
3579 3591
3580 static void 3592 static void
3581 xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, 3593 xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3582 void *data) { 3594 void *data) {
3583 #ifdef DEBUG_PUSH 3595 #ifdef DEBUG_PUSH
3584 printf("saving value: %d:%s\n", exec->inputStackNr, value); 3596 printf("saving value: %d:%s\n", exec->inputStackNr, value);
3585 #endif 3597 #endif
3586 if (exec->inputStackMax == 0) { 3598 if (exec->inputStackMax == 0) {
3587 exec->inputStackMax = 4; 3599 exec->inputStackMax = 4;
3588 » exec->inputStack = (xmlRegInputTokenPtr) 3600 » exec->inputStack = (xmlRegInputTokenPtr)
3589 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); 3601 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3590 if (exec->inputStack == NULL) { 3602 if (exec->inputStack == NULL) {
3591 xmlRegexpErrMemory(NULL, "pushing input string"); 3603 xmlRegexpErrMemory(NULL, "pushing input string");
3592 exec->inputStackMax = 0; 3604 exec->inputStackMax = 0;
3593 return; 3605 return;
3594 } 3606 }
3595 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { 3607 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3596 xmlRegInputTokenPtr tmp; 3608 xmlRegInputTokenPtr tmp;
3597 3609
3598 exec->inputStackMax *= 2; 3610 exec->inputStackMax *= 2;
3599 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, 3611 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3600 exec->inputStackMax * sizeof(xmlRegInputToken)); 3612 exec->inputStackMax * sizeof(xmlRegInputToken));
3601 if (tmp == NULL) { 3613 if (tmp == NULL) {
3602 xmlRegexpErrMemory(NULL, "pushing input string"); 3614 xmlRegexpErrMemory(NULL, "pushing input string");
3603 exec->inputStackMax /= 2; 3615 exec->inputStackMax /= 2;
3604 return; 3616 return;
3605 } 3617 }
3606 exec->inputStack = tmp; 3618 exec->inputStack = tmp;
3607 } 3619 }
3608 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); 3620 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3609 exec->inputStack[exec->inputStackNr].data = data; 3621 exec->inputStack[exec->inputStackNr].data = data;
3610 exec->inputStackNr++; 3622 exec->inputStackNr++;
3611 exec->inputStack[exec->inputStackNr].value = NULL; 3623 exec->inputStack[exec->inputStackNr].value = NULL;
3612 exec->inputStack[exec->inputStackNr].data = NULL; 3624 exec->inputStack[exec->inputStackNr].data = NULL;
3613 } 3625 }
3614 3626
3615 /** 3627 /**
3616 * xmlRegStrEqualWildcard: 3628 * xmlRegStrEqualWildcard:
3617 * @expStr: the string to be evaluated 3629 * @expStr: the string to be evaluated
3618 * @valStr: the validation string 3630 * @valStr: the validation string
3619 * 3631 *
3620 * Checks if both strings are equal or have the same content. "*" 3632 * Checks if both strings are equal or have the same content. "*"
3621 * can be used as a wildcard in @valStr; "|" is used as a seperator of 3633 * can be used as a wildcard in @valStr; "|" is used as a seperator of
3622 * substrings in both @expStr and @valStr. 3634 * substrings in both @expStr and @valStr.
3623 * 3635 *
3624 * Returns 1 if the comparison is satisfied and the number of substrings 3636 * Returns 1 if the comparison is satisfied and the number of substrings
3625 * is equal, 0 otherwise. 3637 * is equal, 0 otherwise.
3626 */ 3638 */
3627 3639
3628 static int 3640 static int
3629 xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { 3641 xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3630 if (expStr == valStr) return(1); 3642 if (expStr == valStr) return(1);
3631 if (expStr == NULL) return(0); 3643 if (expStr == NULL) return(0);
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
3677 static int 3689 static int
3678 xmlRegCompactPushString(xmlRegExecCtxtPtr exec, 3690 xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3679 xmlRegexpPtr comp, 3691 xmlRegexpPtr comp,
3680 const xmlChar *value, 3692 const xmlChar *value,
3681 void *data) { 3693 void *data) {
3682 int state = exec->index; 3694 int state = exec->index;
3683 int i, target; 3695 int i, target;
3684 3696
3685 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) 3697 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3686 return(-1); 3698 return(-1);
3687 3699
3688 if (value == NULL) { 3700 if (value == NULL) {
3689 /* 3701 /*
3690 * are we at a final state ? 3702 * are we at a final state ?
3691 */ 3703 */
3692 if (comp->compact[state * (comp->nbstrings + 1)] == 3704 if (comp->compact[state * (comp->nbstrings + 1)] ==
3693 XML_REGEXP_FINAL_STATE) 3705 XML_REGEXP_FINAL_STATE)
3694 return(1); 3706 return(1);
3695 return(0); 3707 return(0);
3696 } 3708 }
3697 3709
3698 #ifdef DEBUG_PUSH 3710 #ifdef DEBUG_PUSH
3699 printf("value pushed: %s\n", value); 3711 printf("value pushed: %s\n", value);
3700 #endif 3712 #endif
3701 3713
3702 /* 3714 /*
3703 * Examine all outside transitions from current state 3715 * Examine all outside transitions from current state
3704 */ 3716 */
3705 for (i = 0;i < comp->nbstrings;i++) { 3717 for (i = 0;i < comp->nbstrings;i++) {
3706 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 3718 target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3707 if ((target > 0) && (target <= comp->nbstates)) { 3719 if ((target > 0) && (target <= comp->nbstates)) {
3708 » target--; /* to avoid 0 */ 3720 » target--; /* to avoid 0 */
3709 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) { 3721 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3710 » » exec->index = target;» » 3722 » » exec->index = target;
3711 if ((exec->callback != NULL) && (comp->transdata != NULL)) { 3723 if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3712 exec->callback(exec->data, value, 3724 exec->callback(exec->data, value,
3713 comp->transdata[state * comp->nbstrings + i], data); 3725 comp->transdata[state * comp->nbstrings + i], data);
3714 } 3726 }
3715 #ifdef DEBUG_PUSH 3727 #ifdef DEBUG_PUSH
3716 printf("entering state %d\n", target); 3728 printf("entering state %d\n", target);
3717 #endif 3729 #endif
3718 if (comp->compact[target * (comp->nbstrings + 1)] == 3730 if (comp->compact[target * (comp->nbstrings + 1)] ==
3719 XML_REGEXP_SINK_STATE) 3731 XML_REGEXP_SINK_STATE)
3720 goto error; 3732 goto error;
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
3834 */ 3846 */
3835 if ((value == NULL) && (final)) { 3847 if ((value == NULL) && (final)) {
3836 ret = 1; 3848 ret = 1;
3837 } else if (value != NULL) { 3849 } else if (value != NULL) {
3838 for (i = 0;i < exec->state->nbTrans;i++) { 3850 for (i = 0;i < exec->state->nbTrans;i++) {
3839 t = &exec->state->trans[i]; 3851 t = &exec->state->trans[i];
3840 if ((t->counter < 0) || (t == trans)) 3852 if ((t->counter < 0) || (t == trans))
3841 continue; 3853 continue;
3842 counter = &exec->comp->counters[t->counter]; 3854 counter = &exec->comp->counters[t->counter];
3843 count = exec->counts[t->counter]; 3855 count = exec->counts[t->counter];
3844 » » » if ((count < counter->max) && 3856 » » » if ((count < counter->max) &&
3845 (t->atom != NULL) && 3857 (t->atom != NULL) &&
3846 (xmlStrEqual(value, t->atom->valuep))) { 3858 (xmlStrEqual(value, t->atom->valuep))) {
3847 ret = 0; 3859 ret = 0;
3848 break; 3860 break;
3849 } 3861 }
3850 if ((count >= counter->min) && 3862 if ((count >= counter->min) &&
3851 (count < counter->max) && 3863 (count < counter->max) &&
3852 (t->atom != NULL) && 3864 (t->atom != NULL) &&
3853 (xmlStrEqual(value, t->atom->valuep))) { 3865 (xmlStrEqual(value, t->atom->valuep))) {
3854 ret = 1; 3866 ret = 1;
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
4074 exec->errState = exec->state; 4086 exec->errState = exec->state;
4075 memcpy(exec->errCounts, exec->counts, 4087 memcpy(exec->errCounts, exec->counts,
4076 exec->comp->nbCounters * sizeof(int)); 4088 exec->comp->nbCounters * sizeof(int));
4077 } 4089 }
4078 4090
4079 /* 4091 /*
4080 * Failed to find a way out 4092 * Failed to find a way out
4081 */ 4093 */
4082 exec->determinist = 0; 4094 exec->determinist = 0;
4083 xmlFARegExecRollBack(exec); 4095 xmlFARegExecRollBack(exec);
4084 » if (exec->status == 0) { 4096 » if ((exec->inputStack != NULL ) && (exec->status == 0)) {
4085 value = exec->inputStack[exec->index].value; 4097 value = exec->inputStack[exec->index].value;
4086 data = exec->inputStack[exec->index].data; 4098 data = exec->inputStack[exec->index].data;
4087 #ifdef DEBUG_PUSH 4099 #ifdef DEBUG_PUSH
4088 printf("value loaded: %s\n", value); 4100 printf("value loaded: %s\n", value);
4089 #endif 4101 #endif
4090 } 4102 }
4091 } 4103 }
4092 continue; 4104 continue;
4093 progress: 4105 progress:
4094 progress = 1; 4106 progress = 1;
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
4192 * 4204 *
4193 * Returns: 0 in case of success or -1 in case of error. 4205 * Returns: 0 in case of success or -1 in case of error.
4194 */ 4206 */
4195 static int 4207 static int
4196 xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err, 4208 xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
4197 int *nbval, int *nbneg, 4209 int *nbval, int *nbneg,
4198 xmlChar **values, int *terminal) { 4210 xmlChar **values, int *terminal) {
4199 int maxval; 4211 int maxval;
4200 int nb = 0; 4212 int nb = 0;
4201 4213
4202 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 4214 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
4203 (values == NULL) || (*nbval <= 0)) 4215 (values == NULL) || (*nbval <= 0))
4204 return(-1); 4216 return(-1);
4205 4217
4206 maxval = *nbval; 4218 maxval = *nbval;
4207 *nbval = 0; 4219 *nbval = 0;
4208 *nbneg = 0; 4220 *nbneg = 0;
4209 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { 4221 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
4210 xmlRegexpPtr comp; 4222 xmlRegexpPtr comp;
4211 int target, i, state; 4223 int target, i, state;
4212 4224
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
4289 if (exec->comp != NULL) 4301 if (exec->comp != NULL)
4290 counter = &exec->comp->counters[trans->counter]; 4302 counter = &exec->comp->counters[trans->counter];
4291 if ((counter == NULL) || (count < counter->max)) { 4303 if ((counter == NULL) || (count < counter->max)) {
4292 if (atom->neg) 4304 if (atom->neg)
4293 values[nb++] = (xmlChar *) atom->valuep2; 4305 values[nb++] = (xmlChar *) atom->valuep2;
4294 else 4306 else
4295 values[nb++] = (xmlChar *) atom->valuep; 4307 values[nb++] = (xmlChar *) atom->valuep;
4296 (*nbval)++; 4308 (*nbval)++;
4297 } 4309 }
4298 } else { 4310 } else {
4299 if ((exec->comp->states[trans->to] != NULL) && 4311 if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NU LL) &&
4300 (exec->comp->states[trans->to]->type != 4312 (exec->comp->states[trans->to]->type !=
4301 XML_REGEXP_SINK_STATE)) { 4313 XML_REGEXP_SINK_STATE)) {
4302 if (atom->neg) 4314 if (atom->neg)
4303 values[nb++] = (xmlChar *) atom->valuep2; 4315 values[nb++] = (xmlChar *) atom->valuep2;
4304 else 4316 else
4305 values[nb++] = (xmlChar *) atom->valuep; 4317 values[nb++] = (xmlChar *) atom->valuep;
4306 (*nbval)++; 4318 (*nbval)++;
4307 } 4319 }
4308 » } 4320 » }
4309 } 4321 }
4310 for (transno = 0; 4322 for (transno = 0;
4311 (transno < state->nbTrans) && (nb < maxval); 4323 (transno < state->nbTrans) && (nb < maxval);
4312 transno++) { 4324 transno++) {
4313 trans = &state->trans[transno]; 4325 trans = &state->trans[transno];
4314 if (trans->to < 0) 4326 if (trans->to < 0)
4315 continue; 4327 continue;
4316 atom = trans->atom; 4328 atom = trans->atom;
4317 if ((atom == NULL) || (atom->valuep == NULL)) 4329 if ((atom == NULL) || (atom->valuep == NULL))
4318 continue; 4330 continue;
4319 if (trans->count == REGEXP_ALL_LAX_COUNTER) { 4331 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4320 continue; 4332 continue;
4321 } else if (trans->count == REGEXP_ALL_COUNTER) { 4333 } else if (trans->count == REGEXP_ALL_COUNTER) {
4322 continue; 4334 continue;
4323 } else if (trans->counter >= 0) { 4335 } else if (trans->counter >= 0) {
4324 continue; 4336 continue;
4325 } else { 4337 } else {
4326 if ((exec->comp->states[trans->to] != NULL) && 4338 if ((exec->comp->states[trans->to] != NULL) &&
4327 (exec->comp->states[trans->to]->type == 4339 (exec->comp->states[trans->to]->type ==
4328 XML_REGEXP_SINK_STATE)) { 4340 XML_REGEXP_SINK_STATE)) {
4329 if (atom->neg) 4341 if (atom->neg)
4330 values[nb++] = (xmlChar *) atom->valuep2; 4342 values[nb++] = (xmlChar *) atom->valuep2;
4331 else 4343 else
4332 values[nb++] = (xmlChar *) atom->valuep; 4344 values[nb++] = (xmlChar *) atom->valuep;
4333 (*nbneg)++; 4345 (*nbneg)++;
4334 } 4346 }
4335 » } 4347 » }
4336 } 4348 }
4337 } 4349 }
4338 return(0); 4350 return(0);
4339 } 4351 }
4340 4352
4341 /** 4353 /**
4342 * xmlRegExecNextValues: 4354 * xmlRegExecNextValues:
4343 * @exec: a regexp execution context 4355 * @exec: a regexp execution context
4344 * @nbval: pointer to the number of accepted values IN/OUT 4356 * @nbval: pointer to the number of accepted values IN/OUT
4345 * @nbneg: return number of negative transitions 4357 * @nbneg: return number of negative transitions
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
4556 */ 4568 */
4557 exec->determinist = 0; 4569 exec->determinist = 0;
4558 xmlFARegExecRollBack(exec); 4570 xmlFARegExecRollBack(exec);
4559 } 4571 }
4560 progress: 4572 progress:
4561 continue; 4573 continue;
4562 } 4574 }
4563 } 4575 }
4564 #endif 4576 #endif
4565 /************************************************************************ 4577 /************************************************************************
4566 * » » » » » » » » » * 4578 *» » » » » » » » » *
4567 * Parser for the Schemas Datatype Regular Expressions * 4579 * Parser for the Schemas Datatype Regular Expressions *
4568 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * 4580 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
4569 * » » » » » » » » » * 4581 *» » » » » » » » » *
4570 ************************************************************************/ 4582 ************************************************************************/
4571 4583
4572 /** 4584 /**
4573 * xmlFAIsChar: 4585 * xmlFAIsChar:
4574 * @ctxt: a regexp parser context 4586 * @ctxt: a regexp parser context
4575 * 4587 *
4576 * [10] Char ::= [^.\?*+()|#x5B#x5D] 4588 * [10] Char ::= [^.\?*+()|#x5B#x5D]
4577 */ 4589 */
4578 static int 4590 static int
4579 xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { 4591 xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4580 int cur; 4592 int cur;
4581 int len; 4593 int len;
4582 4594
4583 cur = CUR_SCHAR(ctxt->cur, len); 4595 cur = CUR_SCHAR(ctxt->cur, len);
4584 if ((cur == '.') || (cur == '\\') || (cur == '?') || 4596 if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4585 (cur == '*') || (cur == '+') || (cur == '(') || 4597 (cur == '*') || (cur == '+') || (cur == '(') ||
4586 (cur == ')') || (cur == '|') || (cur == 0x5B) || 4598 (cur == ')') || (cur == '|') || (cur == 0x5B) ||
4587 (cur == 0x5D) || (cur == 0)) 4599 (cur == 0x5D) || (cur == 0))
4588 return(-1); 4600 return(-1);
4589 return(cur); 4601 return(cur);
4590 } 4602 }
4591 4603
4592 /** 4604 /**
4593 * xmlFAParseCharProp: 4605 * xmlFAParseCharProp:
4594 * @ctxt: a regexp parser context 4606 * @ctxt: a regexp parser context
4595 * 4607 *
4596 * [27] charProp ::= IsCategory | IsBlock 4608 * [27] charProp ::= IsCategory | IsBlock
4597 * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | 4609 * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation |
4598 * Separators | Symbols | Others 4610 * Separators | Symbols | Others
4599 * [29] Letters ::= 'L' [ultmo]? 4611 * [29] Letters ::= 'L' [ultmo]?
4600 * [30] Marks ::= 'M' [nce]? 4612 * [30] Marks ::= 'M' [nce]?
4601 * [31] Numbers ::= 'N' [dlo]? 4613 * [31] Numbers ::= 'N' [dlo]?
4602 * [32] Punctuation ::= 'P' [cdseifo]? 4614 * [32] Punctuation ::= 'P' [cdseifo]?
4603 * [33] Separators ::= 'Z' [slp]? 4615 * [33] Separators ::= 'Z' [slp]?
4604 * [34] Symbols ::= 'S' [mcko]? 4616 * [34] Symbols ::= 'S' [mcko]?
4605 * [35] Others ::= 'C' [cfon]? 4617 * [35] Others ::= 'C' [cfon]?
4606 * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ 4618 * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+
4607 */ 4619 */
4608 static void 4620 static void
4609 xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { 4621 xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4610 int cur; 4622 int cur;
4611 xmlRegAtomType type = (xmlRegAtomType) 0; 4623 xmlRegAtomType type = (xmlRegAtomType) 0;
4612 xmlChar *blockName = NULL; 4624 xmlChar *blockName = NULL;
4613 4625
4614 cur = CUR; 4626 cur = CUR;
4615 if (cur == 'L') { 4627 if (cur == 'L') {
4616 NEXT; 4628 NEXT;
4617 cur = CUR; 4629 cur = CUR;
4618 if (cur == 'u') { 4630 if (cur == 'u') {
4619 NEXT; 4631 NEXT;
4620 type = XML_REGEXP_LETTER_UPPERCASE; 4632 type = XML_REGEXP_LETTER_UPPERCASE;
4621 } else if (cur == 'l') { 4633 } else if (cur == 'l') {
4622 NEXT; 4634 NEXT;
4623 type = XML_REGEXP_LETTER_LOWERCASE; 4635 type = XML_REGEXP_LETTER_LOWERCASE;
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
4775 const xmlChar *start; 4787 const xmlChar *start;
4776 NEXT; 4788 NEXT;
4777 cur = CUR; 4789 cur = CUR;
4778 if (cur != 's') { 4790 if (cur != 's') {
4779 ERROR("IsXXXX expected"); 4791 ERROR("IsXXXX expected");
4780 return; 4792 return;
4781 } 4793 }
4782 NEXT; 4794 NEXT;
4783 start = ctxt->cur; 4795 start = ctxt->cur;
4784 cur = CUR; 4796 cur = CUR;
4785 » if (((cur >= 'a') && (cur <= 'z')) || 4797 » if (((cur >= 'a') && (cur <= 'z')) ||
4786 » ((cur >= 'A') && (cur <= 'Z')) || 4798 » ((cur >= 'A') && (cur <= 'Z')) ||
4787 » ((cur >= '0') && (cur <= '9')) || 4799 » ((cur >= '0') && (cur <= '9')) ||
4788 (cur == 0x2D)) { 4800 (cur == 0x2D)) {
4789 NEXT; 4801 NEXT;
4790 cur = CUR; 4802 cur = CUR;
4791 » while (((cur >= 'a') && (cur <= 'z')) || 4803 » while (((cur >= 'a') && (cur <= 'z')) ||
4792 » » ((cur >= 'A') && (cur <= 'Z')) || 4804 » » ((cur >= 'A') && (cur <= 'Z')) ||
4793 » » ((cur >= '0') && (cur <= '9')) || 4805 » » ((cur >= '0') && (cur <= '9')) ||
4794 (cur == 0x2D)) { 4806 (cur == 0x2D)) {
4795 NEXT; 4807 NEXT;
4796 cur = CUR; 4808 cur = CUR;
4797 } 4809 }
4798 } 4810 }
4799 type = XML_REGEXP_BLOCK_NAME; 4811 type = XML_REGEXP_BLOCK_NAME;
4800 blockName = xmlStrndup(start, ctxt->cur - start); 4812 blockName = xmlStrndup(start, ctxt->cur - start);
4801 } else { 4813 } else {
4802 ERROR("Unknown char property"); 4814 ERROR("Unknown char property");
4803 return; 4815 return;
4804 } 4816 }
4805 if (ctxt->atom == NULL) { 4817 if (ctxt->atom == NULL) {
4806 ctxt->atom = xmlRegNewAtom(ctxt, type); 4818 ctxt->atom = xmlRegNewAtom(ctxt, type);
4807 if (ctxt->atom != NULL) 4819 if (ctxt->atom != NULL)
4808 ctxt->atom->valuep = blockName; 4820 ctxt->atom->valuep = blockName;
4809 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 4821 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4810 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4822 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4811 type, 0, 0, blockName); 4823 type, 0, 0, blockName);
4812 } 4824 }
4813 } 4825 }
4814 4826
4815 /** 4827 /**
4816 * xmlFAParseCharClassEsc: 4828 * xmlFAParseCharClassEsc:
4817 * @ctxt: a regexp parser context 4829 * @ctxt: a regexp parser context
4818 * 4830 *
4819 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 4831 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
4820 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] 4832 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
4821 * [25] catEsc ::= '\p{' charProp '}' 4833 * [25] catEsc ::= '\p{' charProp '}'
4822 * [26] complEsc ::= '\P{' charProp '}' 4834 * [26] complEsc ::= '\P{' charProp '}'
4823 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) 4835 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
4824 */ 4836 */
4825 static void 4837 static void
4826 xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { 4838 xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4827 int cur; 4839 int cur;
4828 4840
4829 if (CUR == '.') { 4841 if (CUR == '.') {
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
4906 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4918 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4907 XML_REGEXP_CHARVAL, cur, cur, NULL); 4919 XML_REGEXP_CHARVAL, cur, cur, NULL);
4908 } 4920 }
4909 NEXT; 4921 NEXT;
4910 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || 4922 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4911 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || 4923 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4912 (cur == 'w') || (cur == 'W')) { 4924 (cur == 'w') || (cur == 'W')) {
4913 xmlRegAtomType type = XML_REGEXP_ANYSPACE; 4925 xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4914 4926
4915 switch (cur) { 4927 switch (cur) {
4916 » case 's': 4928 » case 's':
4917 type = XML_REGEXP_ANYSPACE; 4929 type = XML_REGEXP_ANYSPACE;
4918 break; 4930 break;
4919 » case 'S': 4931 » case 'S':
4920 type = XML_REGEXP_NOTSPACE; 4932 type = XML_REGEXP_NOTSPACE;
4921 break; 4933 break;
4922 » case 'i': 4934 » case 'i':
4923 type = XML_REGEXP_INITNAME; 4935 type = XML_REGEXP_INITNAME;
4924 break; 4936 break;
4925 » case 'I': 4937 » case 'I':
4926 type = XML_REGEXP_NOTINITNAME; 4938 type = XML_REGEXP_NOTINITNAME;
4927 break; 4939 break;
4928 » case 'c': 4940 » case 'c':
4929 type = XML_REGEXP_NAMECHAR; 4941 type = XML_REGEXP_NAMECHAR;
4930 break; 4942 break;
4931 » case 'C': 4943 » case 'C':
4932 type = XML_REGEXP_NOTNAMECHAR; 4944 type = XML_REGEXP_NOTNAMECHAR;
4933 break; 4945 break;
4934 » case 'd': 4946 » case 'd':
4935 type = XML_REGEXP_DECIMAL; 4947 type = XML_REGEXP_DECIMAL;
4936 break; 4948 break;
4937 » case 'D': 4949 » case 'D':
4938 type = XML_REGEXP_NOTDECIMAL; 4950 type = XML_REGEXP_NOTDECIMAL;
4939 break; 4951 break;
4940 » case 'w': 4952 » case 'w':
4941 type = XML_REGEXP_REALCHAR; 4953 type = XML_REGEXP_REALCHAR;
4942 break; 4954 break;
4943 » case 'W': 4955 » case 'W':
4944 type = XML_REGEXP_NOTREALCHAR; 4956 type = XML_REGEXP_NOTREALCHAR;
4945 break; 4957 break;
4946 } 4958 }
4947 NEXT; 4959 NEXT;
4948 if (ctxt->atom == NULL) { 4960 if (ctxt->atom == NULL) {
4949 ctxt->atom = xmlRegNewAtom(ctxt, type); 4961 ctxt->atom = xmlRegNewAtom(ctxt, type);
4950 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 4962 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4951 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4963 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4952 type, 0, 0, NULL); 4964 type, 0, 0, NULL);
4953 } 4965 }
4954 } else { 4966 } else {
4955 ERROR("Wrong escape sequence, misuse of character '\\'"); 4967 ERROR("Wrong escape sequence, misuse of character '\\'");
4956 } 4968 }
4957 } 4969 }
4958 4970
4959 /** 4971 /**
4960 * xmlFAParseCharRange: 4972 * xmlFAParseCharRange:
4961 * @ctxt: a regexp parser context 4973 * @ctxt: a regexp parser context
4962 * 4974 *
4963 * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash 4975 * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash
4964 * [18] seRange ::= charOrEsc '-' charOrEsc 4976 * [18] seRange ::= charOrEsc '-' charOrEsc
4965 * [20] charOrEsc ::= XmlChar | SingleCharEsc 4977 * [20] charOrEsc ::= XmlChar | SingleCharEsc
4966 * [21] XmlChar ::= [^\#x2D#x5B#x5D] 4978 * [21] XmlChar ::= [^\#x2D#x5B#x5D]
4967 * [22] XmlCharIncDash ::= [^\#x5B#x5D] 4979 * [22] XmlCharIncDash ::= [^\#x5B#x5D]
4968 */ 4980 */
4969 static void 4981 static void
4970 xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { 4982 xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4971 int cur, len; 4983 int cur, len;
4972 int start = -1; 4984 int start = -1;
4973 int end = -1; 4985 int end = -1;
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
5068 } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && 5080 } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
5069 (CUR != 0) && (ctxt->error == 0)); 5081 (CUR != 0) && (ctxt->error == 0));
5070 } 5082 }
5071 5083
5072 /** 5084 /**
5073 * xmlFAParseCharGroup: 5085 * xmlFAParseCharGroup:
5074 * @ctxt: a regexp parser context 5086 * @ctxt: a regexp parser context
5075 * 5087 *
5076 * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub 5088 * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub
5077 * [15] negCharGroup ::= '^' posCharGroup 5089 * [15] negCharGroup ::= '^' posCharGroup
5078 * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr 5090 * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
5079 * [12] charClassExpr ::= '[' charGroup ']' 5091 * [12] charClassExpr ::= '[' charGroup ']'
5080 */ 5092 */
5081 static void 5093 static void
5082 xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { 5094 xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5083 int n = ctxt->neg; 5095 int n = ctxt->neg;
5084 while ((CUR != ']') && (ctxt->error == 0)) { 5096 while ((CUR != ']') && (ctxt->error == 0)) {
5085 if (CUR == '^') { 5097 if (CUR == '^') {
5086 int neg = ctxt->neg; 5098 int neg = ctxt->neg;
5087 5099
5088 NEXT; 5100 NEXT;
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
5316 * [2] branch ::= piece* 5328 * [2] branch ::= piece*
5317 */ 5329 */
5318 static int 5330 static int
5319 xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) { 5331 xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5320 xmlRegStatePtr previous; 5332 xmlRegStatePtr previous;
5321 int ret; 5333 int ret;
5322 5334
5323 previous = ctxt->state; 5335 previous = ctxt->state;
5324 ret = xmlFAParsePiece(ctxt); 5336 ret = xmlFAParsePiece(ctxt);
5325 if (ret != 0) { 5337 if (ret != 0) {
5326 » if (xmlFAGenerateTransitions(ctxt, previous, 5338 » if (xmlFAGenerateTransitions(ctxt, previous,
5327 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) 5339 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5328 return(-1); 5340 return(-1);
5329 previous = ctxt->state; 5341 previous = ctxt->state;
5330 ctxt->atom = NULL; 5342 ctxt->atom = NULL;
5331 } 5343 }
5332 while ((ret != 0) && (ctxt->error == 0)) { 5344 while ((ret != 0) && (ctxt->error == 0)) {
5333 ret = xmlFAParsePiece(ctxt); 5345 ret = xmlFAParsePiece(ctxt);
5334 if (ret != 0) { 5346 if (ret != 0) {
5335 » if (xmlFAGenerateTransitions(ctxt, previous, 5347 » if (xmlFAGenerateTransitions(ctxt, previous,
5336 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) 5348 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5337 return(-1); 5349 return(-1);
5338 previous = ctxt->state; 5350 previous = ctxt->state;
5339 ctxt->atom = NULL; 5351 ctxt->atom = NULL;
5340 } 5352 }
5341 } 5353 }
5342 return(0); 5354 return(0);
5343 } 5355 }
5344 5356
5345 /** 5357 /**
(...skipping 17 matching lines...) Expand all
5363 #endif 5375 #endif
5364 ctxt->state->type = XML_REGEXP_FINAL_STATE; 5376 ctxt->state->type = XML_REGEXP_FINAL_STATE;
5365 } 5377 }
5366 if (CUR != '|') { 5378 if (CUR != '|') {
5367 ctxt->end = ctxt->state; 5379 ctxt->end = ctxt->state;
5368 return; 5380 return;
5369 } 5381 }
5370 end = ctxt->state; 5382 end = ctxt->state;
5371 while ((CUR == '|') && (ctxt->error == 0)) { 5383 while ((CUR == '|') && (ctxt->error == 0)) {
5372 NEXT; 5384 NEXT;
5385 if (CUR == 0) {
5386 ERROR("expecting a branch after |")
5387 return;
5388 }
5373 ctxt->state = start; 5389 ctxt->state = start;
5374 ctxt->end = NULL; 5390 ctxt->end = NULL;
5375 xmlFAParseBranch(ctxt, end); 5391 xmlFAParseBranch(ctxt, end);
5376 } 5392 }
5377 if (!top) { 5393 if (!top) {
5378 ctxt->state = end; 5394 ctxt->state = end;
5379 ctxt->end = end; 5395 ctxt->end = end;
5380 } 5396 }
5381 } 5397 }
5382 5398
5383 /************************************************************************ 5399 /************************************************************************
5384 * » » » » » » » » » * 5400 *» » » » » » » » » *
5385 * » » » The basic API» » » » » * 5401 *» » » The basic API» » » » » *
5386 * » » » » » » » » » * 5402 *» » » » » » » » » *
5387 ************************************************************************/ 5403 ************************************************************************/
5388 5404
5389 /** 5405 /**
5390 * xmlRegexpPrint: 5406 * xmlRegexpPrint:
5391 * @output: the file for the output debug 5407 * @output: the file for the output debug
5392 * @regexp: the compiled regexp 5408 * @regexp: the compiled regexp
5393 * 5409 *
5394 * Print the content of the compiled regular expression 5410 * Print the content of the compiled regular expression
5395 */ 5411 */
5396 void 5412 void
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after
5563 for (i = 0; i < regexp->nbstrings;i++) 5579 for (i = 0; i < regexp->nbstrings;i++)
5564 xmlFree(regexp->stringMap[i]); 5580 xmlFree(regexp->stringMap[i]);
5565 xmlFree(regexp->stringMap); 5581 xmlFree(regexp->stringMap);
5566 } 5582 }
5567 5583
5568 xmlFree(regexp); 5584 xmlFree(regexp);
5569 } 5585 }
5570 5586
5571 #ifdef LIBXML_AUTOMATA_ENABLED 5587 #ifdef LIBXML_AUTOMATA_ENABLED
5572 /************************************************************************ 5588 /************************************************************************
5573 * » » » » » » » » » * 5589 *» » » » » » » » » *
5574 * » » » The Automata interface» » » » * 5590 *» » » The Automata interface» » » » *
5575 * » » » » » » » » » * 5591 *» » » » » » » » » *
5576 ************************************************************************/ 5592 ************************************************************************/
5577 5593
5578 /** 5594 /**
5579 * xmlNewAutomata: 5595 * xmlNewAutomata:
5580 * 5596 *
5581 * Create a new automata 5597 * Create a new automata
5582 * 5598 *
5583 * Returns the new object or NULL in case of failure 5599 * Returns the new object or NULL in case of failure
5584 */ 5600 */
5585 xmlAutomataPtr 5601 xmlAutomataPtr
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
5686 xmlAutomataStatePtr to, const xmlChar *token, 5702 xmlAutomataStatePtr to, const xmlChar *token,
5687 void *data) { 5703 void *data) {
5688 xmlRegAtomPtr atom; 5704 xmlRegAtomPtr atom;
5689 5705
5690 if ((am == NULL) || (from == NULL) || (token == NULL)) 5706 if ((am == NULL) || (from == NULL) || (token == NULL))
5691 return(NULL); 5707 return(NULL);
5692 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 5708 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5693 if (atom == NULL) 5709 if (atom == NULL)
5694 return(NULL); 5710 return(NULL);
5695 atom->data = data; 5711 atom->data = data;
5696 if (atom == NULL)
5697 return(NULL);
5698 atom->valuep = xmlStrdup(token); 5712 atom->valuep = xmlStrdup(token);
5699 5713
5700 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 5714 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5701 xmlRegFreeAtom(atom); 5715 xmlRegFreeAtom(atom);
5702 return(NULL); 5716 return(NULL);
5703 } 5717 }
5704 if (to == NULL) 5718 if (to == NULL)
5705 return(am->state); 5719 return(am->state);
5706 return(to); 5720 return(to);
5707 } 5721 }
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
5836 * @from: the starting point of the transition 5850 * @from: the starting point of the transition
5837 * @to: the target point of the transition or NULL 5851 * @to: the target point of the transition or NULL
5838 * @token: the input string associated to that transition 5852 * @token: the input string associated to that transition
5839 * @token2: the second input string associated to that transition 5853 * @token2: the second input string associated to that transition
5840 * @min: the minimum successive occurences of token 5854 * @min: the minimum successive occurences of token
5841 * @max: the maximum successive occurences of token 5855 * @max: the maximum successive occurences of token
5842 * @data: data associated to the transition 5856 * @data: data associated to the transition
5843 * 5857 *
5844 * If @to is NULL, this creates first a new target state in the automata 5858 * If @to is NULL, this creates first a new target state in the automata
5845 * and then adds a transition from the @from state to the target state 5859 * and then adds a transition from the @from state to the target state
5846 * activated by a succession of input of value @token and @token2 and 5860 * activated by a succession of input of value @token and @token2 and
5847 * whose number is between @min and @max 5861 * whose number is between @min and @max
5848 * 5862 *
5849 * Returns the target state or NULL in case of error 5863 * Returns the target state or NULL in case of error
5850 */ 5864 */
5851 xmlAutomataStatePtr 5865 xmlAutomataStatePtr
5852 xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 5866 xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5853 xmlAutomataStatePtr to, const xmlChar *token, 5867 xmlAutomataStatePtr to, const xmlChar *token,
5854 const xmlChar *token2, 5868 const xmlChar *token2,
5855 int min, int max, void *data) { 5869 int min, int max, void *data) {
5856 xmlRegAtomPtr atom; 5870 xmlRegAtomPtr atom;
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
5990 * @from: the starting point of the transition 6004 * @from: the starting point of the transition
5991 * @to: the target point of the transition or NULL 6005 * @to: the target point of the transition or NULL
5992 * @token: the input string associated to that transition 6006 * @token: the input string associated to that transition
5993 * @token2: the second input string associated to that transition 6007 * @token2: the second input string associated to that transition
5994 * @min: the minimum successive occurences of token 6008 * @min: the minimum successive occurences of token
5995 * @max: the maximum successive occurences of token 6009 * @max: the maximum successive occurences of token
5996 * @data: data associated to the transition 6010 * @data: data associated to the transition
5997 * 6011 *
5998 * If @to is NULL, this creates first a new target state in the automata 6012 * If @to is NULL, this creates first a new target state in the automata
5999 * and then adds a transition from the @from state to the target state 6013 * and then adds a transition from the @from state to the target state
6000 * activated by a succession of input of value @token and @token2 and whose 6014 * activated by a succession of input of value @token and @token2 and whose
6001 * number is between @min and @max, moreover that transition can only be 6015 * number is between @min and @max, moreover that transition can only be
6002 * crossed once. 6016 * crossed once.
6003 * 6017 *
6004 * Returns the target state or NULL in case of error 6018 * Returns the target state or NULL in case of error
6005 */ 6019 */
6006 xmlAutomataStatePtr 6020 xmlAutomataStatePtr
6007 xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 6021 xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6008 xmlAutomataStatePtr to, const xmlChar *token, 6022 xmlAutomataStatePtr to, const xmlChar *token,
6009 const xmlChar *token2, 6023 const xmlChar *token2,
6010 int min, int max, void *data) { 6024 int min, int max, void *data) {
6011 xmlRegAtomPtr atom; 6025 xmlRegAtomPtr atom;
(...skipping 21 matching lines...) Expand all
6033 if (str == NULL) { 6047 if (str == NULL) {
6034 xmlRegFreeAtom(atom); 6048 xmlRegFreeAtom(atom);
6035 return(NULL); 6049 return(NULL);
6036 } 6050 }
6037 memcpy(&str[0], token, lenp); 6051 memcpy(&str[0], token, lenp);
6038 str[lenp] = '|'; 6052 str[lenp] = '|';
6039 memcpy(&str[lenp + 1], token2, lenn); 6053 memcpy(&str[lenp + 1], token2, lenn);
6040 str[lenn + lenp + 1] = 0; 6054 str[lenn + lenp + 1] = 0;
6041 6055
6042 atom->valuep = str; 6056 atom->valuep = str;
6043 } 6057 }
6044 atom->data = data; 6058 atom->data = data;
6045 atom->quant = XML_REGEXP_QUANT_ONCEONLY; 6059 atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6046 atom->min = min; 6060 atom->min = min;
6047 atom->max = max; 6061 atom->max = max;
6048 /* 6062 /*
6049 * associate a counter to the transition. 6063 * associate a counter to the transition.
6050 */ 6064 */
6051 counter = xmlRegGetCounter(am); 6065 counter = xmlRegGetCounter(am);
6052 am->counters[counter].min = 1; 6066 am->counters[counter].min = 1;
6053 am->counters[counter].max = 1; 6067 am->counters[counter].max = 1;
6054 6068
6055 /* xmlFAGenerateTransitions(am, from, to, atom); */ 6069 /* xmlFAGenerateTransitions(am, from, to, atom); */
6056 if (to == NULL) { 6070 if (to == NULL) {
6057 to = xmlRegNewState(am); 6071 to = xmlRegNewState(am);
6058 xmlRegStatePush(am, to); 6072 xmlRegStatePush(am, to);
6059 } 6073 }
6060 xmlRegStateAddTrans(am, from, atom, to, counter, -1); 6074 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6061 xmlRegAtomPush(am, atom); 6075 xmlRegAtomPush(am, atom);
6062 am->state = to; 6076 am->state = to;
6063 return(to); 6077 return(to);
6064 } 6078 }
6065 6079
6066 6080
6067 6081
6068 /** 6082 /**
6069 * xmlAutomataNewOnceTrans: 6083 * xmlAutomataNewOnceTrans:
6070 * @am: an automata 6084 * @am: an automata
6071 * @from: the starting point of the transition 6085 * @from: the starting point of the transition
6072 * @to: the target point of the transition or NULL 6086 * @to: the target point of the transition or NULL
6073 * @token: the input string associated to that transition 6087 * @token: the input string associated to that transition
6074 * @min: the minimum successive occurences of token 6088 * @min: the minimum successive occurences of token
6075 * @max: the maximum successive occurences of token 6089 * @max: the maximum successive occurences of token
6076 * @data: data associated to the transition 6090 * @data: data associated to the transition
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
6125 /** 6139 /**
6126 * xmlAutomataNewState: 6140 * xmlAutomataNewState:
6127 * @am: an automata 6141 * @am: an automata
6128 * 6142 *
6129 * Create a new disconnected state in the automata 6143 * Create a new disconnected state in the automata
6130 * 6144 *
6131 * Returns the new state or NULL in case of error 6145 * Returns the new state or NULL in case of error
6132 */ 6146 */
6133 xmlAutomataStatePtr 6147 xmlAutomataStatePtr
6134 xmlAutomataNewState(xmlAutomataPtr am) { 6148 xmlAutomataNewState(xmlAutomataPtr am) {
6135 xmlAutomataStatePtr to; 6149 xmlAutomataStatePtr to;
6136 6150
6137 if (am == NULL) 6151 if (am == NULL)
6138 return(NULL); 6152 return(NULL);
6139 to = xmlRegNewState(am); 6153 to = xmlRegNewState(am);
6140 xmlRegStatePush(am, to); 6154 xmlRegStatePush(am, to);
6141 return(to); 6155 return(to);
6142 } 6156 }
6143 6157
6144 /** 6158 /**
6145 * xmlAutomataNewEpsilon: 6159 * xmlAutomataNewEpsilon:
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
6192 /** 6206 /**
6193 * xmlAutomataNewCounter: 6207 * xmlAutomataNewCounter:
6194 * @am: an automata 6208 * @am: an automata
6195 * @min: the minimal value on the counter 6209 * @min: the minimal value on the counter
6196 * @max: the maximal value on the counter 6210 * @max: the maximal value on the counter
6197 * 6211 *
6198 * Create a new counter 6212 * Create a new counter
6199 * 6213 *
6200 * Returns the counter number or -1 in case of error 6214 * Returns the counter number or -1 in case of error
6201 */ 6215 */
6202 int» » 6216 int
6203 xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { 6217 xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
6204 int ret; 6218 int ret;
6205 6219
6206 if (am == NULL) 6220 if (am == NULL)
6207 return(-1); 6221 return(-1);
6208 6222
6209 ret = xmlRegGetCounter(am); 6223 ret = xmlRegGetCounter(am);
6210 if (ret < 0) 6224 if (ret < 0)
6211 return(-1); 6225 return(-1);
6212 am->counters[ret].min = min; 6226 am->counters[ret].min = min;
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
6264 6278
6265 /** 6279 /**
6266 * xmlAutomataCompile: 6280 * xmlAutomataCompile:
6267 * @am: an automata 6281 * @am: an automata
6268 * 6282 *
6269 * Compile the automata into a Reg Exp ready for being executed. 6283 * Compile the automata into a Reg Exp ready for being executed.
6270 * The automata should be free after this point. 6284 * The automata should be free after this point.
6271 * 6285 *
6272 * Returns the compiled regexp or NULL in case of error 6286 * Returns the compiled regexp or NULL in case of error
6273 */ 6287 */
6274 xmlRegexpPtr 6288 xmlRegexpPtr
6275 xmlAutomataCompile(xmlAutomataPtr am) { 6289 xmlAutomataCompile(xmlAutomataPtr am) {
6276 xmlRegexpPtr ret; 6290 xmlRegexpPtr ret;
6277 6291
6278 if ((am == NULL) || (am->error != 0)) return(NULL); 6292 if ((am == NULL) || (am->error != 0)) return(NULL);
6279 xmlFAEliminateEpsilonTransitions(am); 6293 xmlFAEliminateEpsilonTransitions(am);
6280 /* xmlFAComputesDeterminism(am); */ 6294 /* xmlFAComputesDeterminism(am); */
6281 ret = xmlRegEpxFromParse(am); 6295 ret = xmlRegEpxFromParse(am);
6282 6296
6283 return(ret); 6297 return(ret);
6284 } 6298 }
6285 6299
6286 /** 6300 /**
6287 * xmlAutomataIsDeterminist: 6301 * xmlAutomataIsDeterminist:
6288 * @am: an automata 6302 * @am: an automata
6289 * 6303 *
6290 * Checks if an automata is determinist. 6304 * Checks if an automata is determinist.
6291 * 6305 *
6292 * Returns 1 if true, 0 if not, and -1 in case of error 6306 * Returns 1 if true, 0 if not, and -1 in case of error
6293 */ 6307 */
6294 int 6308 int
6295 xmlAutomataIsDeterminist(xmlAutomataPtr am) { 6309 xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6296 int ret; 6310 int ret;
6297 6311
6298 if (am == NULL) 6312 if (am == NULL)
6299 return(-1); 6313 return(-1);
6300 6314
6301 ret = xmlFAComputesDeterminism(am); 6315 ret = xmlFAComputesDeterminism(am);
6302 return(ret); 6316 return(ret);
6303 } 6317 }
6304 #endif /* LIBXML_AUTOMATA_ENABLED */ 6318 #endif /* LIBXML_AUTOMATA_ENABLED */
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
6337 * 6351 *
6338 * Returns the context or NULL in case of error 6352 * Returns the context or NULL in case of error
6339 */ 6353 */
6340 xmlExpCtxtPtr 6354 xmlExpCtxtPtr
6341 xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) { 6355 xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
6342 xmlExpCtxtPtr ret; 6356 xmlExpCtxtPtr ret;
6343 int size = 256; 6357 int size = 256;
6344 6358
6345 if (maxNodes <= 4096) 6359 if (maxNodes <= 4096)
6346 maxNodes = 4096; 6360 maxNodes = 4096;
6347 6361
6348 ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt)); 6362 ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
6349 if (ret == NULL) 6363 if (ret == NULL)
6350 return(NULL); 6364 return(NULL);
6351 memset(ret, 0, sizeof(xmlExpCtxt)); 6365 memset(ret, 0, sizeof(xmlExpCtxt));
6352 ret->size = size; 6366 ret->size = size;
6353 ret->nbElems = 0; 6367 ret->nbElems = 0;
6354 ret->maxNodes = maxNodes; 6368 ret->maxNodes = maxNodes;
6355 ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr)); 6369 ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
6356 if (ret->table == NULL) { 6370 if (ret->table == NULL) {
6357 xmlFree(ret); 6371 xmlFree(ret);
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
6391 /************************************************************************ 6405 /************************************************************************
6392 * * 6406 * *
6393 * Structure associated to an expression node * 6407 * Structure associated to an expression node *
6394 * * 6408 * *
6395 ************************************************************************/ 6409 ************************************************************************/
6396 #define MAX_NODES 10000 6410 #define MAX_NODES 10000
6397 6411
6398 /* #define DEBUG_DERIV */ 6412 /* #define DEBUG_DERIV */
6399 6413
6400 /* 6414 /*
6401 * TODO: 6415 * TODO:
6402 * - Wildcards 6416 * - Wildcards
6403 * - public API for creation 6417 * - public API for creation
6404 * 6418 *
6405 * Started 6419 * Started
6406 * - regression testing 6420 * - regression testing
6407 * 6421 *
6408 * Done 6422 * Done
6409 * - split into module and test tool 6423 * - split into module and test tool
6410 * - memleaks 6424 * - memleaks
6411 */ 6425 */
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
6459 * * 6473 * *
6460 ************************************************************************/ 6474 ************************************************************************/
6461 /* 6475 /*
6462 * xmlExpHashNameComputeKey: 6476 * xmlExpHashNameComputeKey:
6463 * Calculate the hash key for a token 6477 * Calculate the hash key for a token
6464 */ 6478 */
6465 static unsigned short 6479 static unsigned short
6466 xmlExpHashNameComputeKey(const xmlChar *name) { 6480 xmlExpHashNameComputeKey(const xmlChar *name) {
6467 unsigned short value = 0L; 6481 unsigned short value = 0L;
6468 char ch; 6482 char ch;
6469 6483
6470 if (name != NULL) { 6484 if (name != NULL) {
6471 value += 30 * (*name); 6485 value += 30 * (*name);
6472 while ((ch = *name++) != 0) { 6486 while ((ch = *name++) != 0) {
6473 » value = value ^ ((value << 5) + (value >> 3) + (unsigned short)ch); 6487 » value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
6474 } 6488 }
6475 } 6489 }
6476 return (value); 6490 return (value);
6477 } 6491 }
6478 6492
6479 /* 6493 /*
6480 * xmlExpHashComputeKey: 6494 * xmlExpHashComputeKey:
6481 * Calculate the hash key for a compound expression 6495 * Calculate the hash key for a compound expression
6482 */ 6496 */
6483 static unsigned short 6497 static unsigned short
6484 xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left, 6498 xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
6485 xmlExpNodePtr right) { 6499 xmlExpNodePtr right) {
6486 unsigned long value; 6500 unsigned long value;
6487 unsigned short ret; 6501 unsigned short ret;
6488 6502
6489 switch (type) { 6503 switch (type) {
6490 case XML_EXP_SEQ: 6504 case XML_EXP_SEQ:
6491 value = left->key; 6505 value = left->key;
6492 value += right->key; 6506 value += right->key;
6493 value *= 3; 6507 value *= 3;
6494 ret = (unsigned short) value; 6508 ret = (unsigned short) value;
6495 break; 6509 break;
6496 case XML_EXP_OR: 6510 case XML_EXP_OR:
6497 value = left->key; 6511 value = left->key;
6498 value += right->key; 6512 value += right->key;
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
6619 tmp = left->exp_right; 6633 tmp = left->exp_right;
6620 left->exp_right = left->exp_left; 6634 left->exp_right = left->exp_left;
6621 left->exp_left = tmp; 6635 left->exp_left = tmp;
6622 } 6636 }
6623 left->exp_right->ref++; 6637 left->exp_right->ref++;
6624 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right, 6638 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6625 NULL, 0, 0); 6639 NULL, 0, 0);
6626 left->exp_left->ref++; 6640 left->exp_left->ref++;
6627 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp, 6641 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6628 NULL, 0, 0); 6642 NULL, 0, 0);
6629 » 6643
6630 xmlExpFree(ctxt, left); 6644 xmlExpFree(ctxt, left);
6631 return(tmp); 6645 return(tmp);
6632 } 6646 }
6633 if (right->type == XML_EXP_OR) { 6647 if (right->type == XML_EXP_OR) {
6634 /* Ordering in the tree */ 6648 /* Ordering in the tree */
6635 /* C | (A | B) -> A | (B | C) */ 6649 /* C | (A | B) -> A | (B | C) */
6636 if (left->key > right->exp_right->key) { 6650 if (left->key > right->exp_right->key) {
6637 xmlExpNodePtr tmp; 6651 xmlExpNodePtr tmp;
6638 right->exp_right->ref++; 6652 right->exp_right->ref++;
6639 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right, 6653 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
6676 return(right); 6690 return(right);
6677 } 6691 }
6678 /* Empty reduction rules */ 6692 /* Empty reduction rules */
6679 if (right->type == XML_EXP_EMPTY) { 6693 if (right->type == XML_EXP_EMPTY) {
6680 return(left); 6694 return(left);
6681 } 6695 }
6682 if (left->type == XML_EXP_EMPTY) { 6696 if (left->type == XML_EXP_EMPTY) {
6683 return(right); 6697 return(right);
6684 } 6698 }
6685 kbase = xmlExpHashComputeKey(type, left, right); 6699 kbase = xmlExpHashComputeKey(type, left, right);
6686 } else 6700 } else
6687 return(NULL); 6701 return(NULL);
6688 6702
6689 key = kbase % ctxt->size; 6703 key = kbase % ctxt->size;
6690 if (ctxt->table[key] != NULL) { 6704 if (ctxt->table[key] != NULL) {
6691 for (insert = ctxt->table[key]; insert != NULL; 6705 for (insert = ctxt->table[key]; insert != NULL;
6692 insert = insert->next) { 6706 insert = insert->next) {
6693 if ((insert->key == kbase) && 6707 if ((insert->key == kbase) &&
6694 (insert->type == type)) { 6708 (insert->type == type)) {
6695 if (type == XML_EXP_ATOM) { 6709 if (type == XML_EXP_ATOM) {
6696 if (name == insert->exp_str) { 6710 if (name == insert->exp_str) {
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
6817 void 6831 void
6818 xmlExpRef(xmlExpNodePtr exp) { 6832 xmlExpRef(xmlExpNodePtr exp) {
6819 if (exp != NULL) 6833 if (exp != NULL)
6820 exp->ref++; 6834 exp->ref++;
6821 } 6835 }
6822 6836
6823 /** 6837 /**
6824 * xmlExpNewAtom: 6838 * xmlExpNewAtom:
6825 * @ctxt: the expression context 6839 * @ctxt: the expression context
6826 * @name: the atom name 6840 * @name: the atom name
6827 * @len: the atom name lenght in byte (or -1); 6841 * @len: the atom name length in byte (or -1);
6828 * 6842 *
6829 * Get the atom associated to this name from that context 6843 * Get the atom associated to this name from that context
6830 * 6844 *
6831 * Returns the node or NULL in case of error 6845 * Returns the node or NULL in case of error
6832 */ 6846 */
6833 xmlExpNodePtr 6847 xmlExpNodePtr
6834 xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) { 6848 xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
6835 if ((ctxt == NULL) || (name == NULL)) 6849 if ((ctxt == NULL) || (name == NULL))
6836 return(NULL); 6850 return(NULL);
6837 name = xmlDictLookup(ctxt->dict, name, len); 6851 name = xmlDictLookup(ctxt->dict, name, len);
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
6917 NULL, NULL, min, max)); 6931 NULL, NULL, min, max));
6918 } 6932 }
6919 6933
6920 /************************************************************************ 6934 /************************************************************************
6921 * * 6935 * *
6922 * Public API for operations on expressions * 6936 * Public API for operations on expressions *
6923 * * 6937 * *
6924 ************************************************************************/ 6938 ************************************************************************/
6925 6939
6926 static int 6940 static int
6927 xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 6941 xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6928 const xmlChar**list, int len, int nb) { 6942 const xmlChar**list, int len, int nb) {
6929 int tmp, tmp2; 6943 int tmp, tmp2;
6930 tail: 6944 tail:
6931 switch (exp->type) { 6945 switch (exp->type) {
6932 case XML_EXP_EMPTY: 6946 case XML_EXP_EMPTY:
6933 return(0); 6947 return(0);
6934 case XML_EXP_ATOM: 6948 case XML_EXP_ATOM:
6935 for (tmp = 0;tmp < nb;tmp++) 6949 for (tmp = 0;tmp < nb;tmp++)
6936 if (list[tmp] == exp->exp_str) 6950 if (list[tmp] == exp->exp_str)
6937 return(0); 6951 return(0);
(...skipping 16 matching lines...) Expand all
6954 return(tmp + tmp2); 6968 return(tmp + tmp2);
6955 } 6969 }
6956 return(-1); 6970 return(-1);
6957 } 6971 }
6958 6972
6959 /** 6973 /**
6960 * xmlExpGetLanguage: 6974 * xmlExpGetLanguage:
6961 * @ctxt: the expression context 6975 * @ctxt: the expression context
6962 * @exp: the expression 6976 * @exp: the expression
6963 * @langList: where to store the tokens 6977 * @langList: where to store the tokens
6964 * @len: the allocated lenght of @list 6978 * @len: the allocated length of @list
6965 * 6979 *
6966 * Find all the strings used in @exp and store them in @list 6980 * Find all the strings used in @exp and store them in @list
6967 * 6981 *
6968 * Returns the number of unique strings found, -1 in case of errors and 6982 * Returns the number of unique strings found, -1 in case of errors and
6969 * -2 if there is more than @len strings 6983 * -2 if there is more than @len strings
6970 */ 6984 */
6971 int 6985 int
6972 xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 6986 xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6973 const xmlChar**langList, int len) { 6987 const xmlChar**langList, int len) {
6974 if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0)) 6988 if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
6975 return(-1); 6989 return(-1);
6976 return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0)); 6990 return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
6977 } 6991 }
6978 6992
6979 static int 6993 static int
6980 xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 6994 xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6981 const xmlChar**list, int len, int nb) { 6995 const xmlChar**list, int len, int nb) {
6982 int tmp, tmp2; 6996 int tmp, tmp2;
6983 tail: 6997 tail:
6984 switch (exp->type) { 6998 switch (exp->type) {
6985 case XML_EXP_FORBID: 6999 case XML_EXP_FORBID:
6986 return(0); 7000 return(0);
6987 case XML_EXP_EMPTY: 7001 case XML_EXP_EMPTY:
6988 return(0); 7002 return(0);
6989 case XML_EXP_ATOM: 7003 case XML_EXP_ATOM:
6990 for (tmp = 0;tmp < nb;tmp++) 7004 for (tmp = 0;tmp < nb;tmp++)
(...skipping 29 matching lines...) Expand all
7020 return(tmp + tmp2); 7034 return(tmp + tmp2);
7021 } 7035 }
7022 return(-1); 7036 return(-1);
7023 } 7037 }
7024 7038
7025 /** 7039 /**
7026 * xmlExpGetStart: 7040 * xmlExpGetStart:
7027 * @ctxt: the expression context 7041 * @ctxt: the expression context
7028 * @exp: the expression 7042 * @exp: the expression
7029 * @tokList: where to store the tokens 7043 * @tokList: where to store the tokens
7030 * @len: the allocated lenght of @list 7044 * @len: the allocated length of @list
7031 * 7045 *
7032 * Find all the strings that appears at the start of the languages 7046 * Find all the strings that appears at the start of the languages
7033 * accepted by @exp and store them in @list. E.g. for (a, b) | c 7047 * accepted by @exp and store them in @list. E.g. for (a, b) | c
7034 * it will return the list [a, c] 7048 * it will return the list [a, c]
7035 * 7049 *
7036 * Returns the number of unique strings found, -1 in case of errors and 7050 * Returns the number of unique strings found, -1 in case of errors and
7037 * -2 if there is more than @len strings 7051 * -2 if there is more than @len strings
7038 */ 7052 */
7039 int 7053 int
7040 xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 7054 xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7041 const xmlChar**tokList, int len) { 7055 const xmlChar**tokList, int len) {
7042 if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0)) 7056 if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
7043 return(-1); 7057 return(-1);
7044 return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0)); 7058 return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
7045 } 7059 }
7046 7060
7047 /** 7061 /**
7048 * xmlExpIsNillable: 7062 * xmlExpIsNillable:
7049 * @exp: the expression 7063 * @exp: the expression
7050 * 7064 *
(...skipping 676 matching lines...) Expand 10 before | Expand all | Expand 10 after
7727 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0); 7741 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7728 if (ret == NULL) { 7742 if (ret == NULL) {
7729 xmlFree((xmlChar **) tab); 7743 xmlFree((xmlChar **) tab);
7730 return(NULL); 7744 return(NULL);
7731 } 7745 }
7732 } 7746 }
7733 } 7747 }
7734 xmlFree((xmlChar **) tab); 7748 xmlFree((xmlChar **) tab);
7735 return(ret); 7749 return(ret);
7736 } 7750 }
7737 7751
7738 /** 7752 /**
7739 * xmlExpExpDerive: 7753 * xmlExpExpDerive:
7740 * @ctxt: the expressions context 7754 * @ctxt: the expressions context
7741 * @exp: the englobing expression 7755 * @exp: the englobing expression
7742 * @sub: the subexpression 7756 * @sub: the subexpression
7743 * 7757 *
7744 * Evaluates the expression resulting from @exp consuming a sub expression @sub 7758 * Evaluates the expression resulting from @exp consuming a sub expression @sub
7745 * Based on algebraic derivation and sometimes direct Brzozowski derivation 7759 * Based on algebraic derivation and sometimes direct Brzozowski derivation
7746 * it usually tatkes less than linear time and can handle expressions generating 7760 * it usually tatkes less than linear time and can handle expressions generating
7747 * infinite languages. 7761 * infinite languages.
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
7779 * @sub: the subexpression 7793 * @sub: the subexpression
7780 * 7794 *
7781 * Check whether @exp accepts all the languages accexpted by @sub 7795 * Check whether @exp accepts all the languages accexpted by @sub
7782 * the input being a subexpression. 7796 * the input being a subexpression.
7783 * 7797 *
7784 * Returns 1 if true 0 if false and -1 in case of failure. 7798 * Returns 1 if true 0 if false and -1 in case of failure.
7785 */ 7799 */
7786 int 7800 int
7787 xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { 7801 xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7788 xmlExpNodePtr tmp; 7802 xmlExpNodePtr tmp;
7789 7803
7790 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL)) 7804 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7791 return(-1); 7805 return(-1);
7792 7806
7793 /* 7807 /*
7794 * TODO: speedup by checking the language of sub is a subset of the 7808 * TODO: speedup by checking the language of sub is a subset of the
7795 * language of exp 7809 * language of exp
7796 */ 7810 */
7797 /* 7811 /*
7798 * O(1) speedups 7812 * O(1) speedups
7799 */ 7813 */
(...skipping 23 matching lines...) Expand all
7823 if ((tmp != NULL) && (IS_NILLABLE(tmp))) { 7837 if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
7824 xmlExpFree(ctxt, tmp); 7838 xmlExpFree(ctxt, tmp);
7825 return(1); 7839 return(1);
7826 } 7840 }
7827 xmlExpFree(ctxt, tmp); 7841 xmlExpFree(ctxt, tmp);
7828 return(0); 7842 return(0);
7829 } 7843 }
7830 7844
7831 /************************************************************************ 7845 /************************************************************************
7832 * * 7846 * *
7833 *» » » Parsing expression » » » » * 7847 *» » » Parsing expression» » » » *
7834 * * 7848 * *
7835 ************************************************************************/ 7849 ************************************************************************/
7836 7850
7837 static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt); 7851 static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
7838 7852
7839 #undef CUR 7853 #undef CUR
7840 #define CUR (*ctxt->cur) 7854 #define CUR (*ctxt->cur)
7841 #undef NEXT 7855 #undef NEXT
7842 #define NEXT ctxt->cur++; 7856 #define NEXT ctxt->cur++;
7843 #undef IS_BLANK 7857 #undef IS_BLANK
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
7927 } else if (CUR == '+') { 7941 } else if (CUR == '+') {
7928 NEXT 7942 NEXT
7929 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, 7943 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7930 1, -1); 7944 1, -1);
7931 SKIP_BLANKS 7945 SKIP_BLANKS
7932 } else if (CUR == '*') { 7946 } else if (CUR == '*') {
7933 NEXT 7947 NEXT
7934 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, 7948 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7935 0, -1); 7949 0, -1);
7936 SKIP_BLANKS 7950 SKIP_BLANKS
7937 } 7951 }
7938 return(ret); 7952 return(ret);
7939 } 7953 }
7940 7954
7941 7955
7942 static xmlExpNodePtr 7956 static xmlExpNodePtr
7943 xmlExpParseSeq(xmlExpCtxtPtr ctxt) { 7957 xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
7944 xmlExpNodePtr ret, right; 7958 xmlExpNodePtr ret, right;
7945 7959
7946 ret = xmlExpParseOr(ctxt); 7960 ret = xmlExpParseOr(ctxt);
7947 SKIP_BLANKS 7961 SKIP_BLANKS
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
8049 xmlExpDumpInt(buf, c, 0); 8063 xmlExpDumpInt(buf, c, 0);
8050 xmlBufferWriteChar(buf, " | "); 8064 xmlBufferWriteChar(buf, " | ");
8051 c = expr->exp_right; 8065 c = expr->exp_right;
8052 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) 8066 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8053 xmlExpDumpInt(buf, c, 1); 8067 xmlExpDumpInt(buf, c, 1);
8054 else 8068 else
8055 xmlExpDumpInt(buf, c, 0); 8069 xmlExpDumpInt(buf, c, 0);
8056 break; 8070 break;
8057 case XML_EXP_COUNT: { 8071 case XML_EXP_COUNT: {
8058 char rep[40]; 8072 char rep[40];
8059 » 8073
8060 c = expr->exp_left; 8074 c = expr->exp_left;
8061 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) 8075 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8062 xmlExpDumpInt(buf, c, 1); 8076 xmlExpDumpInt(buf, c, 1);
8063 else 8077 else
8064 xmlExpDumpInt(buf, c, 0); 8078 xmlExpDumpInt(buf, c, 0);
8065 if ((expr->exp_min == 0) && (expr->exp_max == 1)) { 8079 if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
8066 rep[0] = '?'; 8080 rep[0] = '?';
8067 rep[1] = 0; 8081 rep[1] = 0;
8068 } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) { 8082 } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
8069 rep[0] = '*'; 8083 rep[0] = '*';
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
8144 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) { 8158 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8145 if (ctxt == NULL) 8159 if (ctxt == NULL)
8146 return(-1); 8160 return(-1);
8147 return(ctxt->nb_cons); 8161 return(ctxt->nb_cons);
8148 } 8162 }
8149 8163
8150 #endif /* LIBXML_EXPR_ENABLED */ 8164 #endif /* LIBXML_EXPR_ENABLED */
8151 #define bottom_xmlregexp 8165 #define bottom_xmlregexp
8152 #include "elfgcchack.h" 8166 #include "elfgcchack.h"
8153 #endif /* LIBXML_REGEXP_ENABLED */ 8167 #endif /* LIBXML_REGEXP_ENABLED */
OLDNEW
« no previous file with comments | « third_party/libxml/src/xmlreader.c ('k') | third_party/libxml/src/xmlsave.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698