| OLD | NEW |
| 1 /* | 1 /* |
| 2 * regexp.c: generic and extensible Regular Expression engine | 2 * regexp.c: generic and extensible Regular Expression engine |
| 3 * | 3 * |
| 4 * Basically designed with the purpose of compiling regexps for | 4 * Basically designed with the purpose of compiling regexps for |
| 5 * the variety of validation/shemas mechanisms now available in | 5 * the variety of validation/shemas mechanisms now available in |
| 6 * XML related specifications these include: | 6 * XML related specifications these include: |
| 7 * - XML-1.0 DTD validation | 7 * - XML-1.0 DTD validation |
| 8 * - XML Schemas structure part 1 | 8 * - XML Schemas structure part 1 |
| 9 * - XML Schemas Datatypes part 2 especially Appendix F | 9 * - XML Schemas Datatypes part 2 especially Appendix F |
| 10 * - RELAX-NG/TREX i.e. the counter proposal | 10 * - RELAX-NG/TREX i.e. the counter proposal |
| 11 * | 11 * |
| 12 * See Copyright for the status of this software. | 12 * See Copyright for the status of this software. |
| 13 * | 13 * |
| 14 * Daniel Veillard <veillard@redhat.com> | 14 * Daniel Veillard <veillard@redhat.com> |
| (...skipping 22 matching lines...) Expand all Loading... |
| 37 #define INT_MAX 123456789 /* easy to flag and big enough for our needs */ | 37 #define INT_MAX 123456789 /* easy to flag and big enough for our needs */ |
| 38 #endif | 38 #endif |
| 39 | 39 |
| 40 /* #define DEBUG_REGEXP_GRAPH */ | 40 /* #define DEBUG_REGEXP_GRAPH */ |
| 41 /* #define DEBUG_REGEXP_EXEC */ | 41 /* #define DEBUG_REGEXP_EXEC */ |
| 42 /* #define DEBUG_PUSH */ | 42 /* #define DEBUG_PUSH */ |
| 43 /* #define DEBUG_COMPACTION */ | 43 /* #define DEBUG_COMPACTION */ |
| 44 | 44 |
| 45 #define MAX_PUSH 10000000 | 45 #define MAX_PUSH 10000000 |
| 46 | 46 |
| 47 #ifdef ERROR |
| 48 #undef ERROR |
| 49 #endif |
| 47 #define ERROR(str) \ | 50 #define ERROR(str) \ |
| 48 ctxt->error = XML_REGEXP_COMPILE_ERROR; \ | 51 ctxt->error = XML_REGEXP_COMPILE_ERROR; \ |
| 49 xmlRegexpErrCompile(ctxt, str); | 52 xmlRegexpErrCompile(ctxt, str); |
| 50 #define NEXT ctxt->cur++ | 53 #define NEXT ctxt->cur++ |
| 51 #define CUR (*(ctxt->cur)) | 54 #define CUR (*(ctxt->cur)) |
| 52 #define NXT(index) (ctxt->cur[index]) | 55 #define NXT(index) (ctxt->cur[index]) |
| 53 | 56 |
| 54 #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) | 57 #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) |
| 55 #define NEXTL(l) ctxt->cur += l; | 58 #define NEXTL(l) ctxt->cur += l; |
| 56 #define XML_REG_STRING_SEPARATOR '|' | 59 #define XML_REG_STRING_SEPARATOR '|' |
| 57 /* | 60 /* |
| 58 * Need PREV to check on a '-' within a Character Group. May only be used | 61 * Need PREV to check on a '-' within a Character Group. May only be used |
| 59 * when it's guaranteed that cur is not at the beginning of ctxt->string! | 62 * when it's guaranteed that cur is not at the beginning of ctxt->string! |
| 60 */ | 63 */ |
| 61 #define PREV (ctxt->cur[-1]) | 64 #define PREV (ctxt->cur[-1]) |
| 62 | 65 |
| 63 /** | 66 /** |
| 64 * TODO: | 67 * TODO: |
| 65 * | 68 * |
| 66 * macro to flag unimplemented blocks | 69 * macro to flag unimplemented blocks |
| 67 */ | 70 */ |
| 68 #define TODO » » » » » » » » \ | 71 #define TODO» » » » » » » » \ |
| 69 xmlGenericError(xmlGenericErrorContext, \ | 72 xmlGenericError(xmlGenericErrorContext, \ |
| 70 "Unimplemented block at %s:%d\n", \ | 73 "Unimplemented block at %s:%d\n", \ |
| 71 __FILE__, __LINE__); | 74 __FILE__, __LINE__); |
| 72 | 75 |
| 73 /************************************************************************ | 76 /************************************************************************ |
| 74 * » » » » » » » » » * | 77 *» » » » » » » » » * |
| 75 * » » » Datatypes and structures» » » * | 78 *» » » Datatypes and structures» » » * |
| 76 * » » » » » » » » » * | 79 *» » » » » » » » » * |
| 77 ************************************************************************/ | 80 ************************************************************************/ |
| 78 | 81 |
| 79 /* | 82 /* |
| 80 * Note: the order of the enums below is significant, do not shuffle | 83 * Note: the order of the enums below is significant, do not shuffle |
| 81 */ | 84 */ |
| 82 typedef enum { | 85 typedef enum { |
| 83 XML_REGEXP_EPSILON = 1, | 86 XML_REGEXP_EPSILON = 1, |
| 84 XML_REGEXP_CHARVAL, | 87 XML_REGEXP_CHARVAL, |
| 85 XML_REGEXP_RANGES, | 88 XML_REGEXP_RANGES, |
| 86 XML_REGEXP_SUBREG, /* used for () sub regexps */ | 89 XML_REGEXP_SUBREG, /* used for () sub regexps */ |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 212 xmlRegAtomPtr atom; | 215 xmlRegAtomPtr atom; |
| 213 int to; | 216 int to; |
| 214 int counter; | 217 int counter; |
| 215 int count; | 218 int count; |
| 216 int nd; | 219 int nd; |
| 217 }; | 220 }; |
| 218 | 221 |
| 219 struct _xmlAutomataState { | 222 struct _xmlAutomataState { |
| 220 xmlRegStateType type; | 223 xmlRegStateType type; |
| 221 xmlRegMarkedType mark; | 224 xmlRegMarkedType mark; |
| 225 xmlRegMarkedType markd; |
| 222 xmlRegMarkedType reached; | 226 xmlRegMarkedType reached; |
| 223 int no; | 227 int no; |
| 224 int maxTrans; | 228 int maxTrans; |
| 225 int nbTrans; | 229 int nbTrans; |
| 226 xmlRegTrans *trans; | 230 xmlRegTrans *trans; |
| 227 /* knowing states ponting to us can speed things up */ | 231 /* knowing states ponting to us can speed things up */ |
| 228 int maxTransTo; | 232 int maxTransTo; |
| 229 int nbTransTo; | 233 int nbTransTo; |
| 230 int *transTo; | 234 int *transTo; |
| 231 }; | 235 }; |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 354 static void xmlRegFreeAtom(xmlRegAtomPtr atom); | 358 static void xmlRegFreeAtom(xmlRegAtomPtr atom); |
| 355 static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr); | 359 static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr); |
| 356 static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint); | 360 static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint); |
| 357 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, | 361 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, |
| 358 int neg, int start, int end, const xmlChar *blockName); | 362 int neg, int start, int end, const xmlChar *blockName); |
| 359 | 363 |
| 360 void xmlAutomataSetFlags(xmlAutomataPtr am, int flags); | 364 void xmlAutomataSetFlags(xmlAutomataPtr am, int flags); |
| 361 | 365 |
| 362 /************************************************************************ | 366 /************************************************************************ |
| 363 * * | 367 * * |
| 364 * » » Regexp memory error handler» » » » * | 368 *» » Regexp memory error handler» » » » * |
| 365 * * | 369 * * |
| 366 ************************************************************************/ | 370 ************************************************************************/ |
| 367 /** | 371 /** |
| 368 * xmlRegexpErrMemory: | 372 * xmlRegexpErrMemory: |
| 369 * @extra: extra information | 373 * @extra: extra information |
| 370 * | 374 * |
| 371 * Handle an out of memory condition | 375 * Handle an out of memory condition |
| 372 */ | 376 */ |
| 373 static void | 377 static void |
| 374 xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra) | 378 xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra) |
| (...skipping 26 matching lines...) Expand all Loading... |
| 401 idx = ctxt->cur - ctxt->string; | 405 idx = ctxt->cur - ctxt->string; |
| 402 ctxt->error = XML_REGEXP_COMPILE_ERROR; | 406 ctxt->error = XML_REGEXP_COMPILE_ERROR; |
| 403 } | 407 } |
| 404 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, | 408 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, |
| 405 XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra, | 409 XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra, |
| 406 regexp, NULL, idx, 0, | 410 regexp, NULL, idx, 0, |
| 407 "failed to compile: %s\n", extra); | 411 "failed to compile: %s\n", extra); |
| 408 } | 412 } |
| 409 | 413 |
| 410 /************************************************************************ | 414 /************************************************************************ |
| 411 * » » » » » » » » » * | 415 *» » » » » » » » » * |
| 412 * » » » Allocation/Deallocation»» » » * | 416 *» » » Allocation/Deallocation»» » » * |
| 413 * » » » » » » » » » * | 417 *» » » » » » » » » * |
| 414 ************************************************************************/ | 418 ************************************************************************/ |
| 415 | 419 |
| 416 static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); | 420 static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); |
| 417 /** | 421 /** |
| 418 * xmlRegEpxFromParse: | 422 * xmlRegEpxFromParse: |
| 419 * @ctxt: the parser context used to build it | 423 * @ctxt: the parser context used to build it |
| 420 * | 424 * |
| 421 * Allocate a new regexp and fill it with the result from the parser | 425 * Allocate a new regexp and fill it with the result from the parser |
| 422 * | 426 * |
| 423 * Returns the new regexp or NULL in case of error | 427 * Returns the new regexp or NULL in case of error |
| (...skipping 500 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 924 for (i = 0;i < ctxt->nbAtoms;i++) | 928 for (i = 0;i < ctxt->nbAtoms;i++) |
| 925 xmlRegFreeAtom(ctxt->atoms[i]); | 929 xmlRegFreeAtom(ctxt->atoms[i]); |
| 926 xmlFree(ctxt->atoms); | 930 xmlFree(ctxt->atoms); |
| 927 } | 931 } |
| 928 if (ctxt->counters != NULL) | 932 if (ctxt->counters != NULL) |
| 929 xmlFree(ctxt->counters); | 933 xmlFree(ctxt->counters); |
| 930 xmlFree(ctxt); | 934 xmlFree(ctxt); |
| 931 } | 935 } |
| 932 | 936 |
| 933 /************************************************************************ | 937 /************************************************************************ |
| 934 * » » » » » » » » » * | 938 *» » » » » » » » » * |
| 935 * » » » Display of Data structures» » » * | 939 *» » » Display of Data structures» » » * |
| 936 * » » » » » » » » » * | 940 *» » » » » » » » » * |
| 937 ************************************************************************/ | 941 ************************************************************************/ |
| 938 | 942 |
| 939 static void | 943 static void |
| 940 xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { | 944 xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { |
| 941 switch (type) { | 945 switch (type) { |
| 942 case XML_REGEXP_EPSILON: | 946 case XML_REGEXP_EPSILON: |
| 943 fprintf(output, "epsilon "); break; | 947 fprintf(output, "epsilon "); break; |
| 944 case XML_REGEXP_CHARVAL: | 948 case XML_REGEXP_CHARVAL: |
| 945 fprintf(output, "charval "); break; | 949 fprintf(output, "charval "); break; |
| 946 case XML_REGEXP_RANGES: | 950 case XML_REGEXP_RANGES: |
| (...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1133 fprintf(output, "count based %d, ", trans->count); | 1137 fprintf(output, "count based %d, ", trans->count); |
| 1134 } | 1138 } |
| 1135 if (trans->atom == NULL) { | 1139 if (trans->atom == NULL) { |
| 1136 fprintf(output, "epsilon to %d\n", trans->to); | 1140 fprintf(output, "epsilon to %d\n", trans->to); |
| 1137 return; | 1141 return; |
| 1138 } | 1142 } |
| 1139 if (trans->atom->type == XML_REGEXP_CHARVAL) | 1143 if (trans->atom->type == XML_REGEXP_CHARVAL) |
| 1140 fprintf(output, "char %c ", trans->atom->codepoint); | 1144 fprintf(output, "char %c ", trans->atom->codepoint); |
| 1141 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); | 1145 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); |
| 1142 } | 1146 } |
| 1143 | 1147 |
| 1144 static void | 1148 static void |
| 1145 xmlRegPrintState(FILE *output, xmlRegStatePtr state) { | 1149 xmlRegPrintState(FILE *output, xmlRegStatePtr state) { |
| 1146 int i; | 1150 int i; |
| 1147 | 1151 |
| 1148 fprintf(output, " state: "); | 1152 fprintf(output, " state: "); |
| 1149 if (state == NULL) { | 1153 if (state == NULL) { |
| 1150 fprintf(output, "NULL\n"); | 1154 fprintf(output, "NULL\n"); |
| 1151 return; | 1155 return; |
| 1152 } | 1156 } |
| 1153 if (state->type == XML_REGEXP_START_STATE) | 1157 if (state->type == XML_REGEXP_START_STATE) |
| 1154 fprintf(output, "START "); | 1158 fprintf(output, "START "); |
| 1155 if (state->type == XML_REGEXP_FINAL_STATE) | 1159 if (state->type == XML_REGEXP_FINAL_STATE) |
| 1156 fprintf(output, "FINAL "); | 1160 fprintf(output, "FINAL "); |
| 1157 | 1161 |
| 1158 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); | 1162 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); |
| 1159 for (i = 0;i < state->nbTrans; i++) { | 1163 for (i = 0;i < state->nbTrans; i++) { |
| 1160 xmlRegPrintTrans(output, &(state->trans[i])); | 1164 xmlRegPrintTrans(output, &(state->trans[i])); |
| 1161 } | 1165 } |
| 1162 } | 1166 } |
| 1163 | 1167 |
| 1164 #ifdef DEBUG_REGEXP_GRAPH | 1168 #ifdef DEBUG_REGEXP_GRAPH |
| 1165 static void | 1169 static void |
| 1166 xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { | 1170 xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { |
| 1167 int i; | 1171 int i; |
| (...skipping 29 matching lines...) Expand all Loading... |
| 1197 } | 1201 } |
| 1198 fprintf(output, "%d counters:\n", ctxt->nbCounters); | 1202 fprintf(output, "%d counters:\n", ctxt->nbCounters); |
| 1199 for (i = 0;i < ctxt->nbCounters; i++) { | 1203 for (i = 0;i < ctxt->nbCounters; i++) { |
| 1200 fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, | 1204 fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, |
| 1201 ctxt->counters[i].max); | 1205 ctxt->counters[i].max); |
| 1202 } | 1206 } |
| 1203 } | 1207 } |
| 1204 #endif | 1208 #endif |
| 1205 | 1209 |
| 1206 /************************************************************************ | 1210 /************************************************************************ |
| 1207 * » » » » » » » » » * | 1211 *» » » » » » » » » * |
| 1208 * Finite Automata structures manipulations * | 1212 * Finite Automata structures manipulations * |
| 1209 * » » » » » » » » » * | 1213 *» » » » » » » » » * |
| 1210 ************************************************************************/ | 1214 ************************************************************************/ |
| 1211 | 1215 |
| 1212 static void | 1216 static void |
| 1213 xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, | 1217 xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, |
| 1214 int neg, xmlRegAtomType type, int start, int end, | 1218 int neg, xmlRegAtomType type, int start, int end, |
| 1215 xmlChar *blockName) { | 1219 xmlChar *blockName) { |
| 1216 xmlRegRangePtr range; | 1220 xmlRegRangePtr range; |
| 1217 | 1221 |
| 1218 if (atom == NULL) { | 1222 if (atom == NULL) { |
| 1219 ERROR("add range: atom is NULL"); | 1223 ERROR("add range: atom is NULL"); |
| 1220 return; | 1224 return; |
| 1221 } | 1225 } |
| 1222 if (atom->type != XML_REGEXP_RANGES) { | 1226 if (atom->type != XML_REGEXP_RANGES) { |
| (...skipping 19 matching lines...) Expand all Loading... |
| 1242 atom->maxRanges /= 2; | 1246 atom->maxRanges /= 2; |
| 1243 return; | 1247 return; |
| 1244 } | 1248 } |
| 1245 atom->ranges = tmp; | 1249 atom->ranges = tmp; |
| 1246 } | 1250 } |
| 1247 range = xmlRegNewRange(ctxt, neg, type, start, end); | 1251 range = xmlRegNewRange(ctxt, neg, type, start, end); |
| 1248 if (range == NULL) | 1252 if (range == NULL) |
| 1249 return; | 1253 return; |
| 1250 range->blockName = blockName; | 1254 range->blockName = blockName; |
| 1251 atom->ranges[atom->nbRanges++] = range; | 1255 atom->ranges[atom->nbRanges++] = range; |
| 1252 | 1256 |
| 1253 } | 1257 } |
| 1254 | 1258 |
| 1255 static int | 1259 static int |
| 1256 xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { | 1260 xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { |
| 1257 if (ctxt->maxCounters == 0) { | 1261 if (ctxt->maxCounters == 0) { |
| 1258 ctxt->maxCounters = 4; | 1262 ctxt->maxCounters = 4; |
| 1259 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * | 1263 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * |
| 1260 sizeof(xmlRegCounter)); | 1264 sizeof(xmlRegCounter)); |
| 1261 if (ctxt->counters == NULL) { | 1265 if (ctxt->counters == NULL) { |
| 1262 xmlRegexpErrMemory(ctxt, "allocating counter"); | 1266 xmlRegexpErrMemory(ctxt, "allocating counter"); |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1273 ctxt->maxCounters /= 2; | 1277 ctxt->maxCounters /= 2; |
| 1274 return(-1); | 1278 return(-1); |
| 1275 } | 1279 } |
| 1276 ctxt->counters = tmp; | 1280 ctxt->counters = tmp; |
| 1277 } | 1281 } |
| 1278 ctxt->counters[ctxt->nbCounters].min = -1; | 1282 ctxt->counters[ctxt->nbCounters].min = -1; |
| 1279 ctxt->counters[ctxt->nbCounters].max = -1; | 1283 ctxt->counters[ctxt->nbCounters].max = -1; |
| 1280 return(ctxt->nbCounters++); | 1284 return(ctxt->nbCounters++); |
| 1281 } | 1285 } |
| 1282 | 1286 |
| 1283 static int | 1287 static int |
| 1284 xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { | 1288 xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { |
| 1285 if (atom == NULL) { | 1289 if (atom == NULL) { |
| 1286 ERROR("atom push: atom is NULL"); | 1290 ERROR("atom push: atom is NULL"); |
| 1287 return(-1); | 1291 return(-1); |
| 1288 } | 1292 } |
| 1289 if (ctxt->maxAtoms == 0) { | 1293 if (ctxt->maxAtoms == 0) { |
| 1290 ctxt->maxAtoms = 4; | 1294 ctxt->maxAtoms = 4; |
| 1291 ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * | 1295 ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * |
| 1292 sizeof(xmlRegAtomPtr)); | 1296 sizeof(xmlRegAtomPtr)); |
| 1293 if (ctxt->atoms == NULL) { | 1297 if (ctxt->atoms == NULL) { |
| (...skipping 11 matching lines...) Expand all Loading... |
| 1305 ctxt->maxAtoms /= 2; | 1309 ctxt->maxAtoms /= 2; |
| 1306 return(-1); | 1310 return(-1); |
| 1307 } | 1311 } |
| 1308 ctxt->atoms = tmp; | 1312 ctxt->atoms = tmp; |
| 1309 } | 1313 } |
| 1310 atom->no = ctxt->nbAtoms; | 1314 atom->no = ctxt->nbAtoms; |
| 1311 ctxt->atoms[ctxt->nbAtoms++] = atom; | 1315 ctxt->atoms[ctxt->nbAtoms++] = atom; |
| 1312 return(0); | 1316 return(0); |
| 1313 } | 1317 } |
| 1314 | 1318 |
| 1315 static void | 1319 static void |
| 1316 xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target, | 1320 xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target, |
| 1317 int from) { | 1321 int from) { |
| 1318 if (target->maxTransTo == 0) { | 1322 if (target->maxTransTo == 0) { |
| 1319 target->maxTransTo = 8; | 1323 target->maxTransTo = 8; |
| 1320 target->transTo = (int *) xmlMalloc(target->maxTransTo * | 1324 target->transTo = (int *) xmlMalloc(target->maxTransTo * |
| 1321 sizeof(int)); | 1325 sizeof(int)); |
| 1322 if (target->transTo == NULL) { | 1326 if (target->transTo == NULL) { |
| 1323 xmlRegexpErrMemory(ctxt, "adding transition"); | 1327 xmlRegexpErrMemory(ctxt, "adding transition"); |
| 1324 target->maxTransTo = 0; | 1328 target->maxTransTo = 0; |
| 1325 return; | 1329 return; |
| 1326 } | 1330 } |
| 1327 } else if (target->nbTransTo >= target->maxTransTo) { | 1331 } else if (target->nbTransTo >= target->maxTransTo) { |
| 1328 int *tmp; | 1332 int *tmp; |
| 1329 target->maxTransTo *= 2; | 1333 target->maxTransTo *= 2; |
| 1330 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo * | 1334 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo * |
| 1331 sizeof(int)); | 1335 sizeof(int)); |
| 1332 if (tmp == NULL) { | 1336 if (tmp == NULL) { |
| 1333 xmlRegexpErrMemory(ctxt, "adding transition"); | 1337 xmlRegexpErrMemory(ctxt, "adding transition"); |
| 1334 target->maxTransTo /= 2; | 1338 target->maxTransTo /= 2; |
| 1335 return; | 1339 return; |
| 1336 } | 1340 } |
| 1337 target->transTo = tmp; | 1341 target->transTo = tmp; |
| 1338 } | 1342 } |
| 1339 target->transTo[target->nbTransTo] = from; | 1343 target->transTo[target->nbTransTo] = from; |
| 1340 target->nbTransTo++; | 1344 target->nbTransTo++; |
| 1341 } | 1345 } |
| 1342 | 1346 |
| 1343 static void | 1347 static void |
| 1344 xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, | 1348 xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, |
| 1345 xmlRegAtomPtr atom, xmlRegStatePtr target, | 1349 xmlRegAtomPtr atom, xmlRegStatePtr target, |
| 1346 int counter, int count) { | 1350 int counter, int count) { |
| 1347 | 1351 |
| 1348 int nrtrans; | 1352 int nrtrans; |
| 1349 | 1353 |
| 1350 if (state == NULL) { | 1354 if (state == NULL) { |
| 1351 ERROR("add state: state is NULL"); | 1355 ERROR("add state: state is NULL"); |
| 1352 return; | 1356 return; |
| 1353 } | 1357 } |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1399 #ifdef DEBUG_REGEXP_GRAPH | 1403 #ifdef DEBUG_REGEXP_GRAPH |
| 1400 printf("Add trans from %d to %d ", state->no, target->no); | 1404 printf("Add trans from %d to %d ", state->no, target->no); |
| 1401 if (count == REGEXP_ALL_COUNTER) | 1405 if (count == REGEXP_ALL_COUNTER) |
| 1402 printf("all transition\n"); | 1406 printf("all transition\n"); |
| 1403 else if (count >= 0) | 1407 else if (count >= 0) |
| 1404 printf("count based %d\n", count); | 1408 printf("count based %d\n", count); |
| 1405 else if (counter >= 0) | 1409 else if (counter >= 0) |
| 1406 printf("counted %d\n", counter); | 1410 printf("counted %d\n", counter); |
| 1407 else if (atom == NULL) | 1411 else if (atom == NULL) |
| 1408 printf("epsilon transition\n"); | 1412 printf("epsilon transition\n"); |
| 1409 else if (atom != NULL) | 1413 else if (atom != NULL) |
| 1410 xmlRegPrintAtom(stdout, atom); | 1414 xmlRegPrintAtom(stdout, atom); |
| 1411 #endif | 1415 #endif |
| 1412 | 1416 |
| 1413 state->trans[state->nbTrans].atom = atom; | 1417 state->trans[state->nbTrans].atom = atom; |
| 1414 state->trans[state->nbTrans].to = target->no; | 1418 state->trans[state->nbTrans].to = target->no; |
| 1415 state->trans[state->nbTrans].counter = counter; | 1419 state->trans[state->nbTrans].counter = counter; |
| 1416 state->trans[state->nbTrans].count = count; | 1420 state->trans[state->nbTrans].count = count; |
| 1417 state->trans[state->nbTrans].nd = 0; | 1421 state->trans[state->nbTrans].nd = 0; |
| 1418 state->nbTrans++; | 1422 state->nbTrans++; |
| 1419 xmlRegStateAddTransTo(ctxt, target, state->no); | 1423 xmlRegStateAddTransTo(ctxt, target, state->no); |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1553 if (xmlRegAtomPush(ctxt, atom) < 0) { | 1557 if (xmlRegAtomPush(ctxt, atom) < 0) { |
| 1554 return(-1); | 1558 return(-1); |
| 1555 } | 1559 } |
| 1556 if ((to != NULL) && (atom->stop != to) && | 1560 if ((to != NULL) && (atom->stop != to) && |
| 1557 (atom->quant != XML_REGEXP_QUANT_RANGE)) { | 1561 (atom->quant != XML_REGEXP_QUANT_RANGE)) { |
| 1558 /* | 1562 /* |
| 1559 * Generate an epsilon transition to link to the target | 1563 * Generate an epsilon transition to link to the target |
| 1560 */ | 1564 */ |
| 1561 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); | 1565 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); |
| 1562 #ifdef DV | 1566 #ifdef DV |
| 1563 » } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && | 1567 » } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && |
| 1564 (atom->quant != XML_REGEXP_QUANT_ONCE)) { | 1568 (atom->quant != XML_REGEXP_QUANT_ONCE)) { |
| 1565 to = xmlRegNewState(ctxt); | 1569 to = xmlRegNewState(ctxt); |
| 1566 xmlRegStatePush(ctxt, to); | 1570 xmlRegStatePush(ctxt, to); |
| 1567 ctxt->state = to; | 1571 ctxt->state = to; |
| 1568 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); | 1572 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); |
| 1569 #endif | 1573 #endif |
| 1570 } | 1574 } |
| 1571 switch (atom->quant) { | 1575 switch (atom->quant) { |
| 1572 case XML_REGEXP_QUANT_OPT: | 1576 case XML_REGEXP_QUANT_OPT: |
| 1573 atom->quant = XML_REGEXP_QUANT_ONCE; | 1577 atom->quant = XML_REGEXP_QUANT_ONCE; |
| 1574 /* | 1578 /* |
| 1575 * transition done to the state after end of atom. | 1579 * transition done to the state after end of atom. |
| 1576 * 1. set transition from atom start to new state | 1580 * 1. set transition from atom start to new state |
| 1577 » » * 2. set transition from atom end to this state. | 1581 » » * 2. set transition from atom end to this state. |
| 1578 */ | 1582 */ |
| 1579 if (to == NULL) { | 1583 if (to == NULL) { |
| 1580 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0); | 1584 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0); |
| 1581 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, | 1585 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, |
| 1582 ctxt->state); | 1586 ctxt->state); |
| 1583 } else { | 1587 } else { |
| 1584 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to); | 1588 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to); |
| 1585 } | 1589 } |
| 1586 break; | 1590 break; |
| 1587 case XML_REGEXP_QUANT_MULT: | 1591 case XML_REGEXP_QUANT_MULT: |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1611 * The principle here is to use counted transition | 1615 * The principle here is to use counted transition |
| 1612 * to avoid explosion in the number of states in the | 1616 * to avoid explosion in the number of states in the |
| 1613 * graph. This is clearly more complex but should not | 1617 * graph. This is clearly more complex but should not |
| 1614 * be exploitable at runtime. | 1618 * be exploitable at runtime. |
| 1615 */ | 1619 */ |
| 1616 if ((atom->min == 0) && (atom->start0 == NULL)) { | 1620 if ((atom->min == 0) && (atom->start0 == NULL)) { |
| 1617 xmlRegAtomPtr copy; | 1621 xmlRegAtomPtr copy; |
| 1618 /* | 1622 /* |
| 1619 * duplicate a transition based on atom to count next | 1623 * duplicate a transition based on atom to count next |
| 1620 * occurences after 1. We cannot loop to atom->start | 1624 * occurences after 1. We cannot loop to atom->start |
| 1621 » » * directly because we need an epsilon transition to | 1625 » » * directly because we need an epsilon transition to |
| 1622 * newstate. | 1626 * newstate. |
| 1623 */ | 1627 */ |
| 1624 /* ???? For some reason it seems we never reach that | 1628 /* ???? For some reason it seems we never reach that |
| 1625 case, I suppose this got optimized out before when | 1629 case, I suppose this got optimized out before when |
| 1626 building the automata */ | 1630 building the automata */ |
| 1627 copy = xmlRegCopyAtom(ctxt, atom); | 1631 copy = xmlRegCopyAtom(ctxt, atom); |
| 1628 if (copy == NULL) | 1632 if (copy == NULL) |
| 1629 return(-1); | 1633 return(-1); |
| 1630 copy->quant = XML_REGEXP_QUANT_ONCE; | 1634 copy->quant = XML_REGEXP_QUANT_ONCE; |
| 1631 copy->min = 0; | 1635 copy->min = 0; |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1670 } | 1674 } |
| 1671 atom->min = 0; | 1675 atom->min = 0; |
| 1672 atom->max = 0; | 1676 atom->max = 0; |
| 1673 atom->quant = XML_REGEXP_QUANT_ONCE; | 1677 atom->quant = XML_REGEXP_QUANT_ONCE; |
| 1674 ctxt->state = newstate; | 1678 ctxt->state = newstate; |
| 1675 } | 1679 } |
| 1676 default: | 1680 default: |
| 1677 break; | 1681 break; |
| 1678 } | 1682 } |
| 1679 return(0); | 1683 return(0); |
| 1680 } | 1684 } |
| 1681 if ((atom->min == 0) && (atom->max == 0) && | 1685 if ((atom->min == 0) && (atom->max == 0) && |
| 1682 (atom->quant == XML_REGEXP_QUANT_RANGE)) { | 1686 (atom->quant == XML_REGEXP_QUANT_RANGE)) { |
| 1683 /* | 1687 /* |
| 1684 * we can discard the atom and generate an epsilon transition instead | 1688 * we can discard the atom and generate an epsilon transition instead |
| 1685 */ | 1689 */ |
| 1686 if (to == NULL) { | 1690 if (to == NULL) { |
| 1687 to = xmlRegNewState(ctxt); | 1691 to = xmlRegNewState(ctxt); |
| 1688 if (to != NULL) | 1692 if (to != NULL) |
| 1689 xmlRegStatePush(ctxt, to); | 1693 xmlRegStatePush(ctxt, to); |
| 1690 else { | 1694 else { |
| 1691 return(-1); | 1695 return(-1); |
| 1692 } | 1696 } |
| 1693 } | 1697 } |
| 1694 xmlFAGenerateEpsilonTransition(ctxt, from, to); | 1698 xmlFAGenerateEpsilonTransition(ctxt, from, to); |
| 1695 ctxt->state = to; | 1699 ctxt->state = to; |
| 1696 xmlRegFreeAtom(atom); | 1700 xmlRegFreeAtom(atom); |
| 1697 return(0); | 1701 return(0); |
| 1698 } | 1702 } |
| 1699 if (to == NULL) { | 1703 if (to == NULL) { |
| 1700 to = xmlRegNewState(ctxt); | 1704 to = xmlRegNewState(ctxt); |
| 1701 if (to != NULL) | 1705 if (to != NULL) |
| 1702 xmlRegStatePush(ctxt, to); | 1706 xmlRegStatePush(ctxt, to); |
| 1703 else { | 1707 else { |
| 1704 return(-1); | 1708 return(-1); |
| 1705 } | 1709 } |
| 1706 } | 1710 } |
| 1707 end = to; | 1711 end = to; |
| 1708 if ((atom->quant == XML_REGEXP_QUANT_MULT) || | 1712 if ((atom->quant == XML_REGEXP_QUANT_MULT) || |
| 1709 (atom->quant == XML_REGEXP_QUANT_PLUS)) { | 1713 (atom->quant == XML_REGEXP_QUANT_PLUS)) { |
| 1710 /* | 1714 /* |
| 1711 * Do not pollute the target state by adding transitions from | 1715 * Do not pollute the target state by adding transitions from |
| 1712 * it as it is likely to be the shared target of multiple branches. | 1716 * it as it is likely to be the shared target of multiple branches. |
| 1713 * So isolate with an epsilon transition. | 1717 * So isolate with an epsilon transition. |
| 1714 */ | 1718 */ |
| 1715 xmlRegStatePtr tmp; | 1719 xmlRegStatePtr tmp; |
| 1716 » | 1720 |
| 1717 tmp = xmlRegNewState(ctxt); | 1721 tmp = xmlRegNewState(ctxt); |
| 1718 if (tmp != NULL) | 1722 if (tmp != NULL) |
| 1719 xmlRegStatePush(ctxt, tmp); | 1723 xmlRegStatePush(ctxt, tmp); |
| 1720 else { | 1724 else { |
| 1721 return(-1); | 1725 return(-1); |
| 1722 } | 1726 } |
| 1723 xmlFAGenerateEpsilonTransition(ctxt, tmp, to); | 1727 xmlFAGenerateEpsilonTransition(ctxt, tmp, to); |
| 1724 to = tmp; | 1728 to = tmp; |
| 1725 } | 1729 } |
| 1726 if (xmlRegAtomPush(ctxt, atom) < 0) { | 1730 if (xmlRegAtomPush(ctxt, atom) < 0) { |
| 1727 return(-1); | 1731 return(-1); |
| 1728 } | 1732 } |
| 1729 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); | 1733 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); |
| 1730 ctxt->state = end; | 1734 ctxt->state = end; |
| 1731 switch (atom->quant) { | 1735 switch (atom->quant) { |
| 1732 case XML_REGEXP_QUANT_OPT: | 1736 case XML_REGEXP_QUANT_OPT: |
| 1733 atom->quant = XML_REGEXP_QUANT_ONCE; | 1737 atom->quant = XML_REGEXP_QUANT_ONCE; |
| 1734 xmlFAGenerateEpsilonTransition(ctxt, from, to); | 1738 xmlFAGenerateEpsilonTransition(ctxt, from, to); |
| 1735 break; | 1739 break; |
| 1736 case XML_REGEXP_QUANT_MULT: | 1740 case XML_REGEXP_QUANT_MULT: |
| 1737 atom->quant = XML_REGEXP_QUANT_ONCE; | 1741 atom->quant = XML_REGEXP_QUANT_ONCE; |
| 1738 xmlFAGenerateEpsilonTransition(ctxt, from, to); | 1742 xmlFAGenerateEpsilonTransition(ctxt, from, to); |
| 1739 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); | 1743 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); |
| 1740 break; | 1744 break; |
| 1741 case XML_REGEXP_QUANT_PLUS: | 1745 case XML_REGEXP_QUANT_PLUS: |
| 1742 atom->quant = XML_REGEXP_QUANT_ONCE; | 1746 atom->quant = XML_REGEXP_QUANT_ONCE; |
| 1743 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); | 1747 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); |
| 1744 break; | 1748 break; |
| 1745 » case XML_REGEXP_QUANT_RANGE: | 1749 » case XML_REGEXP_QUANT_RANGE: |
| 1746 #if DV_test | 1750 #if DV_test |
| 1747 if (atom->min == 0) { | 1751 if (atom->min == 0) { |
| 1748 xmlFAGenerateEpsilonTransition(ctxt, from, to); | 1752 xmlFAGenerateEpsilonTransition(ctxt, from, to); |
| 1749 } | 1753 } |
| 1750 #endif | 1754 #endif |
| 1751 break; | 1755 break; |
| 1752 default: | 1756 default: |
| 1753 break; | 1757 break; |
| 1754 } | 1758 } |
| 1755 return(0); | 1759 return(0); |
| 1756 } | 1760 } |
| 1757 | 1761 |
| 1758 /** | 1762 /** |
| 1759 * xmlFAReduceEpsilonTransitions: | 1763 * xmlFAReduceEpsilonTransitions: |
| 1760 * @ctxt: a regexp parser context | 1764 * @ctxt: a regexp parser context |
| 1761 * @fromnr: the from state | 1765 * @fromnr: the from state |
| 1762 * @tonr: the to state | 1766 * @tonr: the to state |
| 1763 * @counter: should that transition be associated to a counted | 1767 * @counter: should that transition be associated to a counted |
| 1764 * | 1768 * |
| 1765 */ | 1769 */ |
| 1766 static void | 1770 static void |
| 1767 xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, | 1771 xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, |
| 1768 int tonr, int counter) { | 1772 int tonr, int counter) { |
| 1769 int transnr; | 1773 int transnr; |
| 1770 xmlRegStatePtr from; | 1774 xmlRegStatePtr from; |
| 1771 xmlRegStatePtr to; | 1775 xmlRegStatePtr to; |
| 1772 | 1776 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1796 if (to->trans[transnr].atom == NULL) { | 1800 if (to->trans[transnr].atom == NULL) { |
| 1797 /* | 1801 /* |
| 1798 * Don't remove counted transitions | 1802 * Don't remove counted transitions |
| 1799 * Don't loop either | 1803 * Don't loop either |
| 1800 */ | 1804 */ |
| 1801 if (to->trans[transnr].to != fromnr) { | 1805 if (to->trans[transnr].to != fromnr) { |
| 1802 if (to->trans[transnr].count >= 0) { | 1806 if (to->trans[transnr].count >= 0) { |
| 1803 int newto = to->trans[transnr].to; | 1807 int newto = to->trans[transnr].to; |
| 1804 | 1808 |
| 1805 xmlRegStateAddTrans(ctxt, from, NULL, | 1809 xmlRegStateAddTrans(ctxt, from, NULL, |
| 1806 » » » » » ctxt->states[newto], | 1810 » » » » » ctxt->states[newto], |
| 1807 -1, to->trans[transnr].count); | 1811 -1, to->trans[transnr].count); |
| 1808 } else { | 1812 } else { |
| 1809 #ifdef DEBUG_REGEXP_GRAPH | 1813 #ifdef DEBUG_REGEXP_GRAPH |
| 1810 printf("Found epsilon trans %d from %d to %d\n", | 1814 printf("Found epsilon trans %d from %d to %d\n", |
| 1811 transnr, tonr, to->trans[transnr].to); | 1815 transnr, tonr, to->trans[transnr].to); |
| 1812 #endif | 1816 #endif |
| 1813 if (to->trans[transnr].counter >= 0) { | 1817 if (to->trans[transnr].counter >= 0) { |
| 1814 xmlFAReduceEpsilonTransitions(ctxt, fromnr, | 1818 xmlFAReduceEpsilonTransitions(ctxt, fromnr, |
| 1815 to->trans[transnr].to, | 1819 to->trans[transnr].to, |
| 1816 to->trans[transnr].counter); | 1820 to->trans[transnr].counter); |
| 1817 } else { | 1821 } else { |
| 1818 xmlFAReduceEpsilonTransitions(ctxt, fromnr, | 1822 xmlFAReduceEpsilonTransitions(ctxt, fromnr, |
| 1819 to->trans[transnr].to, | 1823 to->trans[transnr].to, |
| 1820 counter); | 1824 counter); |
| 1821 } | 1825 } |
| 1822 } | 1826 } |
| 1823 } | 1827 } |
| 1824 } else { | 1828 } else { |
| 1825 int newto = to->trans[transnr].to; | 1829 int newto = to->trans[transnr].to; |
| 1826 | 1830 |
| 1827 if (to->trans[transnr].counter >= 0) { | 1831 if (to->trans[transnr].counter >= 0) { |
| 1828 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, | 1832 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, |
| 1829 » » » » ctxt->states[newto], | 1833 » » » » ctxt->states[newto], |
| 1830 to->trans[transnr].counter, -1); | 1834 to->trans[transnr].counter, -1); |
| 1831 } else { | 1835 } else { |
| 1832 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, | 1836 » » xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, |
| 1833 ctxt->states[newto], counter, -1); | 1837 ctxt->states[newto], counter, -1); |
| 1834 } | 1838 } |
| 1835 } | 1839 } |
| 1836 } | 1840 } |
| 1837 to->mark = XML_REGEXP_MARK_NORMAL; | 1841 to->mark = XML_REGEXP_MARK_NORMAL; |
| 1838 } | 1842 } |
| 1839 | 1843 |
| 1840 /** | 1844 /** |
| 1841 * xmlFAEliminateSimpleEpsilonTransitions: | 1845 * xmlFAEliminateSimpleEpsilonTransitions: |
| 1842 * @ctxt: a regexp parser context | 1846 * @ctxt: a regexp parser context |
| 1843 * | 1847 * |
| 1844 * Eliminating general epsilon transitions can get costly in the general | 1848 * Eliminating general epsilon transitions can get costly in the general |
| 1845 * algorithm due to the large amount of generated new transitions and | 1849 * algorithm due to the large amount of generated new transitions and |
| 1846 * associated comparisons. However for simple epsilon transition used just | 1850 * associated comparisons. However for simple epsilon transition used just |
| 1847 * to separate building blocks when generating the automata this can be | 1851 * to separate building blocks when generating the automata this can be |
| 1848 * reduced to state elimination: | 1852 * reduced to state elimination: |
| 1849 * - if there exists an epsilon from X to Y | 1853 * - if there exists an epsilon from X to Y |
| 1850 * - if there is no other transition from X | 1854 * - if there is no other transition from X |
| 1851 * then X and Y are semantically equivalent and X can be eliminated | 1855 * then X and Y are semantically equivalent and X can be eliminated |
| 1852 * If X is the start state then make Y the start state, else replace the | 1856 * If X is the start state then make Y the start state, else replace the |
| 1853 * target of all transitions to X by transitions to Y. | 1857 * target of all transitions to X by transitions to Y. |
| 1854 */ | 1858 */ |
| (...skipping 15 matching lines...) Expand all Loading... |
| 1870 (state->trans[0].to >= 0) && | 1874 (state->trans[0].to >= 0) && |
| 1871 (state->trans[0].to != statenr) && | 1875 (state->trans[0].to != statenr) && |
| 1872 (state->trans[0].counter < 0) && | 1876 (state->trans[0].counter < 0) && |
| 1873 (state->trans[0].count < 0)) { | 1877 (state->trans[0].count < 0)) { |
| 1874 newto = state->trans[0].to; | 1878 newto = state->trans[0].to; |
| 1875 | 1879 |
| 1876 if (state->type == XML_REGEXP_START_STATE) { | 1880 if (state->type == XML_REGEXP_START_STATE) { |
| 1877 #ifdef DEBUG_REGEXP_GRAPH | 1881 #ifdef DEBUG_REGEXP_GRAPH |
| 1878 printf("Found simple epsilon trans from start %d to %d\n", | 1882 printf("Found simple epsilon trans from start %d to %d\n", |
| 1879 statenr, newto); | 1883 statenr, newto); |
| 1880 #endif | 1884 #endif |
| 1881 } else { | 1885 } else { |
| 1882 #ifdef DEBUG_REGEXP_GRAPH | 1886 #ifdef DEBUG_REGEXP_GRAPH |
| 1883 printf("Found simple epsilon trans from %d to %d\n", | 1887 printf("Found simple epsilon trans from %d to %d\n", |
| 1884 statenr, newto); | 1888 statenr, newto); |
| 1885 #endif | 1889 #endif |
| 1886 for (i = 0;i < state->nbTransTo;i++) { | 1890 for (i = 0;i < state->nbTransTo;i++) { |
| 1887 tmp = ctxt->states[state->transTo[i]]; | 1891 tmp = ctxt->states[state->transTo[i]]; |
| 1888 for (j = 0;j < tmp->nbTrans;j++) { | 1892 for (j = 0;j < tmp->nbTrans;j++) { |
| 1889 if (tmp->trans[j].to == statenr) { | 1893 if (tmp->trans[j].to == statenr) { |
| 1890 #ifdef DEBUG_REGEXP_GRAPH | 1894 #ifdef DEBUG_REGEXP_GRAPH |
| 1891 printf("Changed transition %d on %d to go to %d\n", | 1895 printf("Changed transition %d on %d to go to %d\n", |
| 1892 j, tmp->no, newto); | 1896 j, tmp->no, newto); |
| 1893 #endif | 1897 #endif |
| 1894 tmp->trans[j].to = -1; | 1898 tmp->trans[j].to = -1; |
| 1895 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom, | 1899 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom, |
| 1896 » » » » » » ctxt->states[newto], | 1900 » » » » » » ctxt->states[newto], |
| 1897 tmp->trans[j].counter, | 1901 tmp->trans[j].counter, |
| 1898 tmp->trans[j].count); | 1902 tmp->trans[j].count); |
| 1899 } | 1903 } |
| 1900 } | 1904 } |
| 1901 } | 1905 } |
| 1902 if (state->type == XML_REGEXP_FINAL_STATE) | 1906 if (state->type == XML_REGEXP_FINAL_STATE) |
| 1903 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; | 1907 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; |
| 1904 /* eliminate the transition completely */ | 1908 /* eliminate the transition completely */ |
| 1905 state->nbTrans = 0; | 1909 state->nbTrans = 0; |
| 1906 | 1910 |
| 1907 state->type = XML_REGEXP_UNREACH_STATE; | 1911 state->type = XML_REGEXP_UNREACH_STATE; |
| 1908 | 1912 |
| 1909 } | 1913 } |
| 1910 | 1914 |
| 1911 } | 1915 } |
| 1912 } | 1916 } |
| 1913 } | 1917 } |
| 1914 /** | 1918 /** |
| 1915 * xmlFAEliminateEpsilonTransitions: | 1919 * xmlFAEliminateEpsilonTransitions: |
| 1916 * @ctxt: a regexp parser context | 1920 * @ctxt: a regexp parser context |
| 1917 * | 1921 * |
| 1918 */ | 1922 */ |
| 1919 static void | 1923 static void |
| 1920 xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { | 1924 xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2102 ret = 0; | 2106 ret = 0; |
| 2103 else | 2107 else |
| 2104 ret = 1; | 2108 ret = 1; |
| 2105 } else if (range1->type == XML_REGEXP_CHARVAL) { | 2109 } else if (range1->type == XML_REGEXP_CHARVAL) { |
| 2106 int codepoint; | 2110 int codepoint; |
| 2107 int neg = 0; | 2111 int neg = 0; |
| 2108 | 2112 |
| 2109 /* | 2113 /* |
| 2110 * just check all codepoints in the range for acceptance, | 2114 * just check all codepoints in the range for acceptance, |
| 2111 * this is usually way cheaper since done only once at | 2115 * this is usually way cheaper since done only once at |
| 2112 » * compilation than testing over and over at runtime or | 2116 » * compilation than testing over and over at runtime or |
| 2113 * pushing too many states when evaluating. | 2117 * pushing too many states when evaluating. |
| 2114 */ | 2118 */ |
| 2115 if (((range1->neg == 0) && (range2->neg != 0)) || | 2119 if (((range1->neg == 0) && (range2->neg != 0)) || |
| 2116 ((range1->neg != 0) && (range2->neg == 0))) | 2120 ((range1->neg != 0) && (range2->neg == 0))) |
| 2117 neg = 1; | 2121 neg = 1; |
| 2118 | 2122 |
| 2119 for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) { | 2123 for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) { |
| 2120 ret = xmlRegCheckCharacterRange(range2->type, codepoint, | 2124 ret = xmlRegCheckCharacterRange(range2->type, codepoint, |
| 2121 0, range2->start, range2->end, | 2125 0, range2->start, range2->end, |
| 2122 range2->blockName); | 2126 range2->blockName); |
| (...skipping 456 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2579 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, | 2583 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, |
| 2580 int to, xmlRegAtomPtr atom) { | 2584 int to, xmlRegAtomPtr atom) { |
| 2581 int ret = 1; | 2585 int ret = 1; |
| 2582 int res; | 2586 int res; |
| 2583 int transnr, nbTrans; | 2587 int transnr, nbTrans; |
| 2584 xmlRegTransPtr t1; | 2588 xmlRegTransPtr t1; |
| 2585 int deep = 1; | 2589 int deep = 1; |
| 2586 | 2590 |
| 2587 if (state == NULL) | 2591 if (state == NULL) |
| 2588 return(ret); | 2592 return(ret); |
| 2593 if (state->markd == XML_REGEXP_MARK_VISITED) |
| 2594 return(ret); |
| 2589 | 2595 |
| 2590 if (ctxt->flags & AM_AUTOMATA_RNG) | 2596 if (ctxt->flags & AM_AUTOMATA_RNG) |
| 2591 deep = 0; | 2597 deep = 0; |
| 2592 | 2598 |
| 2593 /* | 2599 /* |
| 2594 * don't recurse on transitions potentially added in the course of | 2600 * don't recurse on transitions potentially added in the course of |
| 2595 * the elimination. | 2601 * the elimination. |
| 2596 */ | 2602 */ |
| 2597 nbTrans = state->nbTrans; | 2603 nbTrans = state->nbTrans; |
| 2598 for (transnr = 0;transnr < nbTrans;transnr++) { | 2604 for (transnr = 0;transnr < nbTrans;transnr++) { |
| 2599 t1 = &(state->trans[transnr]); | 2605 t1 = &(state->trans[transnr]); |
| 2600 /* | 2606 /* |
| 2601 * check transitions conflicting with the one looked at | 2607 * check transitions conflicting with the one looked at |
| 2602 */ | 2608 */ |
| 2603 if (t1->atom == NULL) { | 2609 if (t1->atom == NULL) { |
| 2604 if (t1->to < 0) | 2610 if (t1->to < 0) |
| 2605 continue; | 2611 continue; |
| 2612 state->markd = XML_REGEXP_MARK_VISITED; |
| 2606 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], | 2613 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], |
| 2607 to, atom); | 2614 to, atom); |
| 2615 state->markd = 0; |
| 2608 if (res == 0) { | 2616 if (res == 0) { |
| 2609 ret = 0; | 2617 ret = 0; |
| 2610 /* t1->nd = 1; */ | 2618 /* t1->nd = 1; */ |
| 2611 } | 2619 } |
| 2612 continue; | 2620 continue; |
| 2613 } | 2621 } |
| 2614 if (t1->to != to) | 2622 if (t1->to != to) |
| 2615 continue; | 2623 continue; |
| 2616 if (xmlFACompareAtoms(t1->atom, atom, deep)) { | 2624 if (xmlFACompareAtoms(t1->atom, atom, deep)) { |
| 2617 ret = 0; | 2625 ret = 0; |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2766 transition get marked down | 2774 transition get marked down |
| 2767 if (ret == 0) | 2775 if (ret == 0) |
| 2768 break; */ | 2776 break; */ |
| 2769 } | 2777 } |
| 2770 | 2778 |
| 2771 ctxt->determinist = ret; | 2779 ctxt->determinist = ret; |
| 2772 return(ret); | 2780 return(ret); |
| 2773 } | 2781 } |
| 2774 | 2782 |
| 2775 /************************************************************************ | 2783 /************************************************************************ |
| 2776 * » » » » » » » » » * | 2784 *» » » » » » » » » * |
| 2777 * Routines to check input against transition atoms * | 2785 * Routines to check input against transition atoms * |
| 2778 * » » » » » » » » » * | 2786 *» » » » » » » » » * |
| 2779 ************************************************************************/ | 2787 ************************************************************************/ |
| 2780 | 2788 |
| 2781 static int | 2789 static int |
| 2782 xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, | 2790 xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, |
| 2783 int start, int end, const xmlChar *blockName) { | 2791 int start, int end, const xmlChar *blockName) { |
| 2784 int ret = 0; | 2792 int ret = 0; |
| 2785 | 2793 |
| 2786 switch (type) { | 2794 switch (type) { |
| 2787 case XML_REGEXP_STRING: | 2795 case XML_REGEXP_STRING: |
| 2788 case XML_REGEXP_SUBREG: | 2796 case XML_REGEXP_SUBREG: |
| 2789 case XML_REGEXP_RANGES: | 2797 case XML_REGEXP_RANGES: |
| 2790 case XML_REGEXP_EPSILON: | 2798 case XML_REGEXP_EPSILON: |
| 2791 return(-1); | 2799 return(-1); |
| 2792 case XML_REGEXP_ANYCHAR: | 2800 case XML_REGEXP_ANYCHAR: |
| 2793 ret = ((codepoint != '\n') && (codepoint != '\r')); | 2801 ret = ((codepoint != '\n') && (codepoint != '\r')); |
| 2794 break; | 2802 break; |
| 2795 case XML_REGEXP_CHARVAL: | 2803 case XML_REGEXP_CHARVAL: |
| 2796 ret = ((codepoint >= start) && (codepoint <= end)); | 2804 ret = ((codepoint >= start) && (codepoint <= end)); |
| 2797 break; | 2805 break; |
| 2798 case XML_REGEXP_NOTSPACE: | 2806 case XML_REGEXP_NOTSPACE: |
| 2799 neg = !neg; | 2807 neg = !neg; |
| 2800 case XML_REGEXP_ANYSPACE: | 2808 case XML_REGEXP_ANYSPACE: |
| 2801 ret = ((codepoint == '\n') || (codepoint == '\r') || | 2809 ret = ((codepoint == '\n') || (codepoint == '\r') || |
| 2802 (codepoint == '\t') || (codepoint == ' ')); | 2810 (codepoint == '\t') || (codepoint == ' ')); |
| 2803 break; | 2811 break; |
| 2804 case XML_REGEXP_NOTINITNAME: | 2812 case XML_REGEXP_NOTINITNAME: |
| 2805 neg = !neg; | 2813 neg = !neg; |
| 2806 case XML_REGEXP_INITNAME: | 2814 case XML_REGEXP_INITNAME: |
| 2807 » ret = (IS_LETTER(codepoint) || | 2815 » ret = (IS_LETTER(codepoint) || |
| 2808 (codepoint == '_') || (codepoint == ':')); | 2816 (codepoint == '_') || (codepoint == ':')); |
| 2809 break; | 2817 break; |
| 2810 case XML_REGEXP_NOTNAMECHAR: | 2818 case XML_REGEXP_NOTNAMECHAR: |
| 2811 neg = !neg; | 2819 neg = !neg; |
| 2812 case XML_REGEXP_NAMECHAR: | 2820 case XML_REGEXP_NAMECHAR: |
| 2813 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) || | 2821 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) || |
| 2814 (codepoint == '.') || (codepoint == '-') || | 2822 (codepoint == '.') || (codepoint == '-') || |
| 2815 (codepoint == '_') || (codepoint == ':') || | 2823 (codepoint == '_') || (codepoint == ':') || |
| 2816 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint)); | 2824 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint)); |
| 2817 break; | 2825 break; |
| (...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3045 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, | 3053 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, |
| 3046 (const xmlChar *)atom->valuep); | 3054 (const xmlChar *)atom->valuep); |
| 3047 if (atom->neg) | 3055 if (atom->neg) |
| 3048 ret = !ret; | 3056 ret = !ret; |
| 3049 break; | 3057 break; |
| 3050 } | 3058 } |
| 3051 return(ret); | 3059 return(ret); |
| 3052 } | 3060 } |
| 3053 | 3061 |
| 3054 /************************************************************************ | 3062 /************************************************************************ |
| 3055 * » » » » » » » » » * | 3063 *» » » » » » » » » * |
| 3056 * Saving and restoring state of an execution context * | 3064 * Saving and restoring state of an execution context * |
| 3057 * » » » » » » » » » * | 3065 *» » » » » » » » » * |
| 3058 ************************************************************************/ | 3066 ************************************************************************/ |
| 3059 | 3067 |
| 3060 #ifdef DEBUG_REGEXP_EXEC | 3068 #ifdef DEBUG_REGEXP_EXEC |
| 3061 static void | 3069 static void |
| 3062 xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { | 3070 xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { |
| 3063 printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); | 3071 printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); |
| 3064 if (exec->inputStack != NULL) { | 3072 if (exec->inputStack != NULL) { |
| 3065 int i; | 3073 int i; |
| 3066 printf(": "); | 3074 printf(": "); |
| 3067 for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) | 3075 for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3147 exec->nbRollbacks--; | 3155 exec->nbRollbacks--; |
| 3148 exec->state = exec->rollbacks[exec->nbRollbacks].state; | 3156 exec->state = exec->rollbacks[exec->nbRollbacks].state; |
| 3149 exec->index = exec->rollbacks[exec->nbRollbacks].index; | 3157 exec->index = exec->rollbacks[exec->nbRollbacks].index; |
| 3150 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; | 3158 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; |
| 3151 if (exec->comp->nbCounters > 0) { | 3159 if (exec->comp->nbCounters > 0) { |
| 3152 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { | 3160 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { |
| 3153 fprintf(stderr, "exec save: allocation failed"); | 3161 fprintf(stderr, "exec save: allocation failed"); |
| 3154 exec->status = -6; | 3162 exec->status = -6; |
| 3155 return; | 3163 return; |
| 3156 } | 3164 } |
| 3157 » memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, | 3165 » if (exec->counts) { |
| 3166 » memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, |
| 3158 exec->comp->nbCounters * sizeof(int)); | 3167 exec->comp->nbCounters * sizeof(int)); |
| 3168 } |
| 3159 } | 3169 } |
| 3160 | 3170 |
| 3161 #ifdef DEBUG_REGEXP_EXEC | 3171 #ifdef DEBUG_REGEXP_EXEC |
| 3162 printf("restored "); | 3172 printf("restored "); |
| 3163 xmlFARegDebugExec(exec); | 3173 xmlFARegDebugExec(exec); |
| 3164 #endif | 3174 #endif |
| 3165 } | 3175 } |
| 3166 | 3176 |
| 3167 /************************************************************************ | 3177 /************************************************************************ |
| 3168 * » » » » » » » » » * | 3178 *» » » » » » » » » * |
| 3169 * Verifier, running an input against a compiled regexp * | 3179 * Verifier, running an input against a compiled regexp * |
| 3170 * » » » » » » » » » * | 3180 *» » » » » » » » » * |
| 3171 ************************************************************************/ | 3181 ************************************************************************/ |
| 3172 | 3182 |
| 3173 static int | 3183 static int |
| 3174 xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { | 3184 xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { |
| 3175 xmlRegExecCtxt execval; | 3185 xmlRegExecCtxt execval; |
| 3176 xmlRegExecCtxtPtr exec = &execval; | 3186 xmlRegExecCtxtPtr exec = &execval; |
| 3177 int ret, codepoint = 0, len, deter; | 3187 int ret, codepoint = 0, len, deter; |
| 3178 | 3188 |
| 3179 exec->inputString = content; | 3189 exec->inputString = content; |
| 3180 exec->index = 0; | 3190 exec->index = 0; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 3192 exec->inputStackMax = 0; | 3202 exec->inputStackMax = 0; |
| 3193 if (comp->nbCounters > 0) { | 3203 if (comp->nbCounters > 0) { |
| 3194 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); | 3204 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); |
| 3195 if (exec->counts == NULL) { | 3205 if (exec->counts == NULL) { |
| 3196 xmlRegexpErrMemory(NULL, "running regexp"); | 3206 xmlRegexpErrMemory(NULL, "running regexp"); |
| 3197 return(-1); | 3207 return(-1); |
| 3198 } | 3208 } |
| 3199 memset(exec->counts, 0, comp->nbCounters * sizeof(int)); | 3209 memset(exec->counts, 0, comp->nbCounters * sizeof(int)); |
| 3200 } else | 3210 } else |
| 3201 exec->counts = NULL; | 3211 exec->counts = NULL; |
| 3202 while ((exec->status == 0) && | 3212 while ((exec->status == 0) && (exec->state != NULL) && |
| 3203 ((exec->inputString[exec->index] != 0) || | 3213 ((exec->inputString[exec->index] != 0) || |
| 3204 ((exec->state != NULL) && | 3214 ((exec->state != NULL) && |
| 3205 (exec->state->type != XML_REGEXP_FINAL_STATE)))) { | 3215 (exec->state->type != XML_REGEXP_FINAL_STATE)))) { |
| 3206 xmlRegTransPtr trans; | 3216 xmlRegTransPtr trans; |
| 3207 xmlRegAtomPtr atom; | 3217 xmlRegAtomPtr atom; |
| 3208 | 3218 |
| 3209 /* | 3219 /* |
| 3210 * If end of input on non-terminal state, rollback, however we may | 3220 * If end of input on non-terminal state, rollback, however we may |
| 3211 * still have epsilon like transition for counted transitions | 3221 * still have epsilon like transition for counted transitions |
| 3212 * on counters, in that case don't break too early. Additionally, | 3222 * on counters, in that case don't break too early. Additionally, |
| (...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3446 if (exec->rollbacks != NULL) { | 3456 if (exec->rollbacks != NULL) { |
| 3447 if (exec->counts != NULL) { | 3457 if (exec->counts != NULL) { |
| 3448 int i; | 3458 int i; |
| 3449 | 3459 |
| 3450 for (i = 0;i < exec->maxRollbacks;i++) | 3460 for (i = 0;i < exec->maxRollbacks;i++) |
| 3451 if (exec->rollbacks[i].counts != NULL) | 3461 if (exec->rollbacks[i].counts != NULL) |
| 3452 xmlFree(exec->rollbacks[i].counts); | 3462 xmlFree(exec->rollbacks[i].counts); |
| 3453 } | 3463 } |
| 3454 xmlFree(exec->rollbacks); | 3464 xmlFree(exec->rollbacks); |
| 3455 } | 3465 } |
| 3466 if (exec->state == NULL) |
| 3467 return(-1); |
| 3456 if (exec->counts != NULL) | 3468 if (exec->counts != NULL) |
| 3457 xmlFree(exec->counts); | 3469 xmlFree(exec->counts); |
| 3458 if (exec->status == 0) | 3470 if (exec->status == 0) |
| 3459 return(1); | 3471 return(1); |
| 3460 if (exec->status == -1) { | 3472 if (exec->status == -1) { |
| 3461 if (exec->nbPush > MAX_PUSH) | 3473 if (exec->nbPush > MAX_PUSH) |
| 3462 return(-1); | 3474 return(-1); |
| 3463 return(0); | 3475 return(0); |
| 3464 } | 3476 } |
| 3465 return(exec->status); | 3477 return(exec->status); |
| 3466 } | 3478 } |
| 3467 | 3479 |
| 3468 /************************************************************************ | 3480 /************************************************************************ |
| 3469 * » » » » » » » » » * | 3481 *» » » » » » » » » * |
| 3470 * Progressive interface to the verifier one atom at a time * | 3482 * Progressive interface to the verifier one atom at a time * |
| 3471 * » » » » » » » » » * | 3483 *» » » » » » » » » * |
| 3472 ************************************************************************/ | 3484 ************************************************************************/ |
| 3473 #ifdef DEBUG_ERR | 3485 #ifdef DEBUG_ERR |
| 3474 static void testerr(xmlRegExecCtxtPtr exec); | 3486 static void testerr(xmlRegExecCtxtPtr exec); |
| 3475 #endif | 3487 #endif |
| 3476 | 3488 |
| 3477 /** | 3489 /** |
| 3478 * xmlRegNewExecCtxt: | 3490 * xmlRegNewExecCtxt: |
| 3479 * @comp: a precompiled regular expression | 3491 * @comp: a precompiled regular expression |
| 3480 * @callback: a callback function used for handling progresses in the | 3492 * @callback: a callback function used for handling progresses in the |
| 3481 * automata matching phase | 3493 * automata matching phase |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3578 } | 3590 } |
| 3579 | 3591 |
| 3580 static void | 3592 static void |
| 3581 xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, | 3593 xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, |
| 3582 void *data) { | 3594 void *data) { |
| 3583 #ifdef DEBUG_PUSH | 3595 #ifdef DEBUG_PUSH |
| 3584 printf("saving value: %d:%s\n", exec->inputStackNr, value); | 3596 printf("saving value: %d:%s\n", exec->inputStackNr, value); |
| 3585 #endif | 3597 #endif |
| 3586 if (exec->inputStackMax == 0) { | 3598 if (exec->inputStackMax == 0) { |
| 3587 exec->inputStackMax = 4; | 3599 exec->inputStackMax = 4; |
| 3588 » exec->inputStack = (xmlRegInputTokenPtr) | 3600 » exec->inputStack = (xmlRegInputTokenPtr) |
| 3589 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); | 3601 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); |
| 3590 if (exec->inputStack == NULL) { | 3602 if (exec->inputStack == NULL) { |
| 3591 xmlRegexpErrMemory(NULL, "pushing input string"); | 3603 xmlRegexpErrMemory(NULL, "pushing input string"); |
| 3592 exec->inputStackMax = 0; | 3604 exec->inputStackMax = 0; |
| 3593 return; | 3605 return; |
| 3594 } | 3606 } |
| 3595 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { | 3607 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { |
| 3596 xmlRegInputTokenPtr tmp; | 3608 xmlRegInputTokenPtr tmp; |
| 3597 | 3609 |
| 3598 exec->inputStackMax *= 2; | 3610 exec->inputStackMax *= 2; |
| 3599 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, | 3611 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, |
| 3600 exec->inputStackMax * sizeof(xmlRegInputToken)); | 3612 exec->inputStackMax * sizeof(xmlRegInputToken)); |
| 3601 if (tmp == NULL) { | 3613 if (tmp == NULL) { |
| 3602 xmlRegexpErrMemory(NULL, "pushing input string"); | 3614 xmlRegexpErrMemory(NULL, "pushing input string"); |
| 3603 exec->inputStackMax /= 2; | 3615 exec->inputStackMax /= 2; |
| 3604 return; | 3616 return; |
| 3605 } | 3617 } |
| 3606 exec->inputStack = tmp; | 3618 exec->inputStack = tmp; |
| 3607 } | 3619 } |
| 3608 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); | 3620 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); |
| 3609 exec->inputStack[exec->inputStackNr].data = data; | 3621 exec->inputStack[exec->inputStackNr].data = data; |
| 3610 exec->inputStackNr++; | 3622 exec->inputStackNr++; |
| 3611 exec->inputStack[exec->inputStackNr].value = NULL; | 3623 exec->inputStack[exec->inputStackNr].value = NULL; |
| 3612 exec->inputStack[exec->inputStackNr].data = NULL; | 3624 exec->inputStack[exec->inputStackNr].data = NULL; |
| 3613 } | 3625 } |
| 3614 | 3626 |
| 3615 /** | 3627 /** |
| 3616 * xmlRegStrEqualWildcard: | 3628 * xmlRegStrEqualWildcard: |
| 3617 * @expStr: the string to be evaluated | 3629 * @expStr: the string to be evaluated |
| 3618 * @valStr: the validation string | 3630 * @valStr: the validation string |
| 3619 * | 3631 * |
| 3620 * Checks if both strings are equal or have the same content. "*" | 3632 * Checks if both strings are equal or have the same content. "*" |
| 3621 * can be used as a wildcard in @valStr; "|" is used as a seperator of | 3633 * can be used as a wildcard in @valStr; "|" is used as a seperator of |
| 3622 * substrings in both @expStr and @valStr. | 3634 * substrings in both @expStr and @valStr. |
| 3623 * | 3635 * |
| 3624 * Returns 1 if the comparison is satisfied and the number of substrings | 3636 * Returns 1 if the comparison is satisfied and the number of substrings |
| 3625 * is equal, 0 otherwise. | 3637 * is equal, 0 otherwise. |
| 3626 */ | 3638 */ |
| 3627 | 3639 |
| 3628 static int | 3640 static int |
| 3629 xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { | 3641 xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { |
| 3630 if (expStr == valStr) return(1); | 3642 if (expStr == valStr) return(1); |
| 3631 if (expStr == NULL) return(0); | 3643 if (expStr == NULL) return(0); |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3677 static int | 3689 static int |
| 3678 xmlRegCompactPushString(xmlRegExecCtxtPtr exec, | 3690 xmlRegCompactPushString(xmlRegExecCtxtPtr exec, |
| 3679 xmlRegexpPtr comp, | 3691 xmlRegexpPtr comp, |
| 3680 const xmlChar *value, | 3692 const xmlChar *value, |
| 3681 void *data) { | 3693 void *data) { |
| 3682 int state = exec->index; | 3694 int state = exec->index; |
| 3683 int i, target; | 3695 int i, target; |
| 3684 | 3696 |
| 3685 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) | 3697 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) |
| 3686 return(-1); | 3698 return(-1); |
| 3687 | 3699 |
| 3688 if (value == NULL) { | 3700 if (value == NULL) { |
| 3689 /* | 3701 /* |
| 3690 * are we at a final state ? | 3702 * are we at a final state ? |
| 3691 */ | 3703 */ |
| 3692 if (comp->compact[state * (comp->nbstrings + 1)] == | 3704 if (comp->compact[state * (comp->nbstrings + 1)] == |
| 3693 XML_REGEXP_FINAL_STATE) | 3705 XML_REGEXP_FINAL_STATE) |
| 3694 return(1); | 3706 return(1); |
| 3695 return(0); | 3707 return(0); |
| 3696 } | 3708 } |
| 3697 | 3709 |
| 3698 #ifdef DEBUG_PUSH | 3710 #ifdef DEBUG_PUSH |
| 3699 printf("value pushed: %s\n", value); | 3711 printf("value pushed: %s\n", value); |
| 3700 #endif | 3712 #endif |
| 3701 | 3713 |
| 3702 /* | 3714 /* |
| 3703 * Examine all outside transitions from current state | 3715 * Examine all outside transitions from current state |
| 3704 */ | 3716 */ |
| 3705 for (i = 0;i < comp->nbstrings;i++) { | 3717 for (i = 0;i < comp->nbstrings;i++) { |
| 3706 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; | 3718 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; |
| 3707 if ((target > 0) && (target <= comp->nbstates)) { | 3719 if ((target > 0) && (target <= comp->nbstates)) { |
| 3708 » target--; /* to avoid 0 */ | 3720 » target--; /* to avoid 0 */ |
| 3709 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) { | 3721 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) { |
| 3710 » » exec->index = target;» » | 3722 » » exec->index = target; |
| 3711 if ((exec->callback != NULL) && (comp->transdata != NULL)) { | 3723 if ((exec->callback != NULL) && (comp->transdata != NULL)) { |
| 3712 exec->callback(exec->data, value, | 3724 exec->callback(exec->data, value, |
| 3713 comp->transdata[state * comp->nbstrings + i], data); | 3725 comp->transdata[state * comp->nbstrings + i], data); |
| 3714 } | 3726 } |
| 3715 #ifdef DEBUG_PUSH | 3727 #ifdef DEBUG_PUSH |
| 3716 printf("entering state %d\n", target); | 3728 printf("entering state %d\n", target); |
| 3717 #endif | 3729 #endif |
| 3718 if (comp->compact[target * (comp->nbstrings + 1)] == | 3730 if (comp->compact[target * (comp->nbstrings + 1)] == |
| 3719 XML_REGEXP_SINK_STATE) | 3731 XML_REGEXP_SINK_STATE) |
| 3720 goto error; | 3732 goto error; |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3834 */ | 3846 */ |
| 3835 if ((value == NULL) && (final)) { | 3847 if ((value == NULL) && (final)) { |
| 3836 ret = 1; | 3848 ret = 1; |
| 3837 } else if (value != NULL) { | 3849 } else if (value != NULL) { |
| 3838 for (i = 0;i < exec->state->nbTrans;i++) { | 3850 for (i = 0;i < exec->state->nbTrans;i++) { |
| 3839 t = &exec->state->trans[i]; | 3851 t = &exec->state->trans[i]; |
| 3840 if ((t->counter < 0) || (t == trans)) | 3852 if ((t->counter < 0) || (t == trans)) |
| 3841 continue; | 3853 continue; |
| 3842 counter = &exec->comp->counters[t->counter]; | 3854 counter = &exec->comp->counters[t->counter]; |
| 3843 count = exec->counts[t->counter]; | 3855 count = exec->counts[t->counter]; |
| 3844 » » » if ((count < counter->max) && | 3856 » » » if ((count < counter->max) && |
| 3845 (t->atom != NULL) && | 3857 (t->atom != NULL) && |
| 3846 (xmlStrEqual(value, t->atom->valuep))) { | 3858 (xmlStrEqual(value, t->atom->valuep))) { |
| 3847 ret = 0; | 3859 ret = 0; |
| 3848 break; | 3860 break; |
| 3849 } | 3861 } |
| 3850 if ((count >= counter->min) && | 3862 if ((count >= counter->min) && |
| 3851 (count < counter->max) && | 3863 (count < counter->max) && |
| 3852 (t->atom != NULL) && | 3864 (t->atom != NULL) && |
| 3853 (xmlStrEqual(value, t->atom->valuep))) { | 3865 (xmlStrEqual(value, t->atom->valuep))) { |
| 3854 ret = 1; | 3866 ret = 1; |
| (...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4074 exec->errState = exec->state; | 4086 exec->errState = exec->state; |
| 4075 memcpy(exec->errCounts, exec->counts, | 4087 memcpy(exec->errCounts, exec->counts, |
| 4076 exec->comp->nbCounters * sizeof(int)); | 4088 exec->comp->nbCounters * sizeof(int)); |
| 4077 } | 4089 } |
| 4078 | 4090 |
| 4079 /* | 4091 /* |
| 4080 * Failed to find a way out | 4092 * Failed to find a way out |
| 4081 */ | 4093 */ |
| 4082 exec->determinist = 0; | 4094 exec->determinist = 0; |
| 4083 xmlFARegExecRollBack(exec); | 4095 xmlFARegExecRollBack(exec); |
| 4084 » if (exec->status == 0) { | 4096 » if ((exec->inputStack != NULL ) && (exec->status == 0)) { |
| 4085 value = exec->inputStack[exec->index].value; | 4097 value = exec->inputStack[exec->index].value; |
| 4086 data = exec->inputStack[exec->index].data; | 4098 data = exec->inputStack[exec->index].data; |
| 4087 #ifdef DEBUG_PUSH | 4099 #ifdef DEBUG_PUSH |
| 4088 printf("value loaded: %s\n", value); | 4100 printf("value loaded: %s\n", value); |
| 4089 #endif | 4101 #endif |
| 4090 } | 4102 } |
| 4091 } | 4103 } |
| 4092 continue; | 4104 continue; |
| 4093 progress: | 4105 progress: |
| 4094 progress = 1; | 4106 progress = 1; |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4192 * | 4204 * |
| 4193 * Returns: 0 in case of success or -1 in case of error. | 4205 * Returns: 0 in case of success or -1 in case of error. |
| 4194 */ | 4206 */ |
| 4195 static int | 4207 static int |
| 4196 xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err, | 4208 xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err, |
| 4197 int *nbval, int *nbneg, | 4209 int *nbval, int *nbneg, |
| 4198 xmlChar **values, int *terminal) { | 4210 xmlChar **values, int *terminal) { |
| 4199 int maxval; | 4211 int maxval; |
| 4200 int nb = 0; | 4212 int nb = 0; |
| 4201 | 4213 |
| 4202 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || | 4214 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || |
| 4203 (values == NULL) || (*nbval <= 0)) | 4215 (values == NULL) || (*nbval <= 0)) |
| 4204 return(-1); | 4216 return(-1); |
| 4205 | 4217 |
| 4206 maxval = *nbval; | 4218 maxval = *nbval; |
| 4207 *nbval = 0; | 4219 *nbval = 0; |
| 4208 *nbneg = 0; | 4220 *nbneg = 0; |
| 4209 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { | 4221 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { |
| 4210 xmlRegexpPtr comp; | 4222 xmlRegexpPtr comp; |
| 4211 int target, i, state; | 4223 int target, i, state; |
| 4212 | 4224 |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4289 if (exec->comp != NULL) | 4301 if (exec->comp != NULL) |
| 4290 counter = &exec->comp->counters[trans->counter]; | 4302 counter = &exec->comp->counters[trans->counter]; |
| 4291 if ((counter == NULL) || (count < counter->max)) { | 4303 if ((counter == NULL) || (count < counter->max)) { |
| 4292 if (atom->neg) | 4304 if (atom->neg) |
| 4293 values[nb++] = (xmlChar *) atom->valuep2; | 4305 values[nb++] = (xmlChar *) atom->valuep2; |
| 4294 else | 4306 else |
| 4295 values[nb++] = (xmlChar *) atom->valuep; | 4307 values[nb++] = (xmlChar *) atom->valuep; |
| 4296 (*nbval)++; | 4308 (*nbval)++; |
| 4297 } | 4309 } |
| 4298 } else { | 4310 } else { |
| 4299 if ((exec->comp->states[trans->to] != NULL) && | 4311 if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NU
LL) && |
| 4300 (exec->comp->states[trans->to]->type != | 4312 (exec->comp->states[trans->to]->type != |
| 4301 XML_REGEXP_SINK_STATE)) { | 4313 XML_REGEXP_SINK_STATE)) { |
| 4302 if (atom->neg) | 4314 if (atom->neg) |
| 4303 values[nb++] = (xmlChar *) atom->valuep2; | 4315 values[nb++] = (xmlChar *) atom->valuep2; |
| 4304 else | 4316 else |
| 4305 values[nb++] = (xmlChar *) atom->valuep; | 4317 values[nb++] = (xmlChar *) atom->valuep; |
| 4306 (*nbval)++; | 4318 (*nbval)++; |
| 4307 } | 4319 } |
| 4308 » } | 4320 » } |
| 4309 } | 4321 } |
| 4310 for (transno = 0; | 4322 for (transno = 0; |
| 4311 (transno < state->nbTrans) && (nb < maxval); | 4323 (transno < state->nbTrans) && (nb < maxval); |
| 4312 transno++) { | 4324 transno++) { |
| 4313 trans = &state->trans[transno]; | 4325 trans = &state->trans[transno]; |
| 4314 if (trans->to < 0) | 4326 if (trans->to < 0) |
| 4315 continue; | 4327 continue; |
| 4316 atom = trans->atom; | 4328 atom = trans->atom; |
| 4317 if ((atom == NULL) || (atom->valuep == NULL)) | 4329 if ((atom == NULL) || (atom->valuep == NULL)) |
| 4318 continue; | 4330 continue; |
| 4319 if (trans->count == REGEXP_ALL_LAX_COUNTER) { | 4331 if (trans->count == REGEXP_ALL_LAX_COUNTER) { |
| 4320 continue; | 4332 continue; |
| 4321 } else if (trans->count == REGEXP_ALL_COUNTER) { | 4333 } else if (trans->count == REGEXP_ALL_COUNTER) { |
| 4322 continue; | 4334 continue; |
| 4323 } else if (trans->counter >= 0) { | 4335 } else if (trans->counter >= 0) { |
| 4324 continue; | 4336 continue; |
| 4325 } else { | 4337 } else { |
| 4326 if ((exec->comp->states[trans->to] != NULL) && | 4338 if ((exec->comp->states[trans->to] != NULL) && |
| 4327 (exec->comp->states[trans->to]->type == | 4339 (exec->comp->states[trans->to]->type == |
| 4328 XML_REGEXP_SINK_STATE)) { | 4340 XML_REGEXP_SINK_STATE)) { |
| 4329 if (atom->neg) | 4341 if (atom->neg) |
| 4330 values[nb++] = (xmlChar *) atom->valuep2; | 4342 values[nb++] = (xmlChar *) atom->valuep2; |
| 4331 else | 4343 else |
| 4332 values[nb++] = (xmlChar *) atom->valuep; | 4344 values[nb++] = (xmlChar *) atom->valuep; |
| 4333 (*nbneg)++; | 4345 (*nbneg)++; |
| 4334 } | 4346 } |
| 4335 » } | 4347 » } |
| 4336 } | 4348 } |
| 4337 } | 4349 } |
| 4338 return(0); | 4350 return(0); |
| 4339 } | 4351 } |
| 4340 | 4352 |
| 4341 /** | 4353 /** |
| 4342 * xmlRegExecNextValues: | 4354 * xmlRegExecNextValues: |
| 4343 * @exec: a regexp execution context | 4355 * @exec: a regexp execution context |
| 4344 * @nbval: pointer to the number of accepted values IN/OUT | 4356 * @nbval: pointer to the number of accepted values IN/OUT |
| 4345 * @nbneg: return number of negative transitions | 4357 * @nbneg: return number of negative transitions |
| (...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4556 */ | 4568 */ |
| 4557 exec->determinist = 0; | 4569 exec->determinist = 0; |
| 4558 xmlFARegExecRollBack(exec); | 4570 xmlFARegExecRollBack(exec); |
| 4559 } | 4571 } |
| 4560 progress: | 4572 progress: |
| 4561 continue; | 4573 continue; |
| 4562 } | 4574 } |
| 4563 } | 4575 } |
| 4564 #endif | 4576 #endif |
| 4565 /************************************************************************ | 4577 /************************************************************************ |
| 4566 * » » » » » » » » » * | 4578 *» » » » » » » » » * |
| 4567 * Parser for the Schemas Datatype Regular Expressions * | 4579 * Parser for the Schemas Datatype Regular Expressions * |
| 4568 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * | 4580 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * |
| 4569 * » » » » » » » » » * | 4581 *» » » » » » » » » * |
| 4570 ************************************************************************/ | 4582 ************************************************************************/ |
| 4571 | 4583 |
| 4572 /** | 4584 /** |
| 4573 * xmlFAIsChar: | 4585 * xmlFAIsChar: |
| 4574 * @ctxt: a regexp parser context | 4586 * @ctxt: a regexp parser context |
| 4575 * | 4587 * |
| 4576 * [10] Char ::= [^.\?*+()|#x5B#x5D] | 4588 * [10] Char ::= [^.\?*+()|#x5B#x5D] |
| 4577 */ | 4589 */ |
| 4578 static int | 4590 static int |
| 4579 xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { | 4591 xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { |
| 4580 int cur; | 4592 int cur; |
| 4581 int len; | 4593 int len; |
| 4582 | 4594 |
| 4583 cur = CUR_SCHAR(ctxt->cur, len); | 4595 cur = CUR_SCHAR(ctxt->cur, len); |
| 4584 if ((cur == '.') || (cur == '\\') || (cur == '?') || | 4596 if ((cur == '.') || (cur == '\\') || (cur == '?') || |
| 4585 (cur == '*') || (cur == '+') || (cur == '(') || | 4597 (cur == '*') || (cur == '+') || (cur == '(') || |
| 4586 (cur == ')') || (cur == '|') || (cur == 0x5B) || | 4598 (cur == ')') || (cur == '|') || (cur == 0x5B) || |
| 4587 (cur == 0x5D) || (cur == 0)) | 4599 (cur == 0x5D) || (cur == 0)) |
| 4588 return(-1); | 4600 return(-1); |
| 4589 return(cur); | 4601 return(cur); |
| 4590 } | 4602 } |
| 4591 | 4603 |
| 4592 /** | 4604 /** |
| 4593 * xmlFAParseCharProp: | 4605 * xmlFAParseCharProp: |
| 4594 * @ctxt: a regexp parser context | 4606 * @ctxt: a regexp parser context |
| 4595 * | 4607 * |
| 4596 * [27] charProp ::= IsCategory | IsBlock | 4608 * [27] charProp ::= IsCategory | IsBlock |
| 4597 * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | | 4609 * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | |
| 4598 * Separators | Symbols | Others | 4610 * Separators | Symbols | Others |
| 4599 * [29] Letters ::= 'L' [ultmo]? | 4611 * [29] Letters ::= 'L' [ultmo]? |
| 4600 * [30] Marks ::= 'M' [nce]? | 4612 * [30] Marks ::= 'M' [nce]? |
| 4601 * [31] Numbers ::= 'N' [dlo]? | 4613 * [31] Numbers ::= 'N' [dlo]? |
| 4602 * [32] Punctuation ::= 'P' [cdseifo]? | 4614 * [32] Punctuation ::= 'P' [cdseifo]? |
| 4603 * [33] Separators ::= 'Z' [slp]? | 4615 * [33] Separators ::= 'Z' [slp]? |
| 4604 * [34] Symbols ::= 'S' [mcko]? | 4616 * [34] Symbols ::= 'S' [mcko]? |
| 4605 * [35] Others ::= 'C' [cfon]? | 4617 * [35] Others ::= 'C' [cfon]? |
| 4606 * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ | 4618 * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ |
| 4607 */ | 4619 */ |
| 4608 static void | 4620 static void |
| 4609 xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { | 4621 xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { |
| 4610 int cur; | 4622 int cur; |
| 4611 xmlRegAtomType type = (xmlRegAtomType) 0; | 4623 xmlRegAtomType type = (xmlRegAtomType) 0; |
| 4612 xmlChar *blockName = NULL; | 4624 xmlChar *blockName = NULL; |
| 4613 | 4625 |
| 4614 cur = CUR; | 4626 cur = CUR; |
| 4615 if (cur == 'L') { | 4627 if (cur == 'L') { |
| 4616 NEXT; | 4628 NEXT; |
| 4617 cur = CUR; | 4629 cur = CUR; |
| 4618 if (cur == 'u') { | 4630 if (cur == 'u') { |
| 4619 NEXT; | 4631 NEXT; |
| 4620 type = XML_REGEXP_LETTER_UPPERCASE; | 4632 type = XML_REGEXP_LETTER_UPPERCASE; |
| 4621 } else if (cur == 'l') { | 4633 } else if (cur == 'l') { |
| 4622 NEXT; | 4634 NEXT; |
| 4623 type = XML_REGEXP_LETTER_LOWERCASE; | 4635 type = XML_REGEXP_LETTER_LOWERCASE; |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4775 const xmlChar *start; | 4787 const xmlChar *start; |
| 4776 NEXT; | 4788 NEXT; |
| 4777 cur = CUR; | 4789 cur = CUR; |
| 4778 if (cur != 's') { | 4790 if (cur != 's') { |
| 4779 ERROR("IsXXXX expected"); | 4791 ERROR("IsXXXX expected"); |
| 4780 return; | 4792 return; |
| 4781 } | 4793 } |
| 4782 NEXT; | 4794 NEXT; |
| 4783 start = ctxt->cur; | 4795 start = ctxt->cur; |
| 4784 cur = CUR; | 4796 cur = CUR; |
| 4785 » if (((cur >= 'a') && (cur <= 'z')) || | 4797 » if (((cur >= 'a') && (cur <= 'z')) || |
| 4786 » ((cur >= 'A') && (cur <= 'Z')) || | 4798 » ((cur >= 'A') && (cur <= 'Z')) || |
| 4787 » ((cur >= '0') && (cur <= '9')) || | 4799 » ((cur >= '0') && (cur <= '9')) || |
| 4788 (cur == 0x2D)) { | 4800 (cur == 0x2D)) { |
| 4789 NEXT; | 4801 NEXT; |
| 4790 cur = CUR; | 4802 cur = CUR; |
| 4791 » while (((cur >= 'a') && (cur <= 'z')) || | 4803 » while (((cur >= 'a') && (cur <= 'z')) || |
| 4792 » » ((cur >= 'A') && (cur <= 'Z')) || | 4804 » » ((cur >= 'A') && (cur <= 'Z')) || |
| 4793 » » ((cur >= '0') && (cur <= '9')) || | 4805 » » ((cur >= '0') && (cur <= '9')) || |
| 4794 (cur == 0x2D)) { | 4806 (cur == 0x2D)) { |
| 4795 NEXT; | 4807 NEXT; |
| 4796 cur = CUR; | 4808 cur = CUR; |
| 4797 } | 4809 } |
| 4798 } | 4810 } |
| 4799 type = XML_REGEXP_BLOCK_NAME; | 4811 type = XML_REGEXP_BLOCK_NAME; |
| 4800 blockName = xmlStrndup(start, ctxt->cur - start); | 4812 blockName = xmlStrndup(start, ctxt->cur - start); |
| 4801 } else { | 4813 } else { |
| 4802 ERROR("Unknown char property"); | 4814 ERROR("Unknown char property"); |
| 4803 return; | 4815 return; |
| 4804 } | 4816 } |
| 4805 if (ctxt->atom == NULL) { | 4817 if (ctxt->atom == NULL) { |
| 4806 ctxt->atom = xmlRegNewAtom(ctxt, type); | 4818 ctxt->atom = xmlRegNewAtom(ctxt, type); |
| 4807 if (ctxt->atom != NULL) | 4819 if (ctxt->atom != NULL) |
| 4808 ctxt->atom->valuep = blockName; | 4820 ctxt->atom->valuep = blockName; |
| 4809 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { | 4821 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { |
| 4810 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, | 4822 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, |
| 4811 type, 0, 0, blockName); | 4823 type, 0, 0, blockName); |
| 4812 } | 4824 } |
| 4813 } | 4825 } |
| 4814 | 4826 |
| 4815 /** | 4827 /** |
| 4816 * xmlFAParseCharClassEsc: | 4828 * xmlFAParseCharClassEsc: |
| 4817 * @ctxt: a regexp parser context | 4829 * @ctxt: a regexp parser context |
| 4818 * | 4830 * |
| 4819 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) | 4831 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) |
| 4820 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] | 4832 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] |
| 4821 * [25] catEsc ::= '\p{' charProp '}' | 4833 * [25] catEsc ::= '\p{' charProp '}' |
| 4822 * [26] complEsc ::= '\P{' charProp '}' | 4834 * [26] complEsc ::= '\P{' charProp '}' |
| 4823 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) | 4835 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) |
| 4824 */ | 4836 */ |
| 4825 static void | 4837 static void |
| 4826 xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { | 4838 xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { |
| 4827 int cur; | 4839 int cur; |
| 4828 | 4840 |
| 4829 if (CUR == '.') { | 4841 if (CUR == '.') { |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4906 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, | 4918 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, |
| 4907 XML_REGEXP_CHARVAL, cur, cur, NULL); | 4919 XML_REGEXP_CHARVAL, cur, cur, NULL); |
| 4908 } | 4920 } |
| 4909 NEXT; | 4921 NEXT; |
| 4910 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || | 4922 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || |
| 4911 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || | 4923 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || |
| 4912 (cur == 'w') || (cur == 'W')) { | 4924 (cur == 'w') || (cur == 'W')) { |
| 4913 xmlRegAtomType type = XML_REGEXP_ANYSPACE; | 4925 xmlRegAtomType type = XML_REGEXP_ANYSPACE; |
| 4914 | 4926 |
| 4915 switch (cur) { | 4927 switch (cur) { |
| 4916 » case 's': | 4928 » case 's': |
| 4917 type = XML_REGEXP_ANYSPACE; | 4929 type = XML_REGEXP_ANYSPACE; |
| 4918 break; | 4930 break; |
| 4919 » case 'S': | 4931 » case 'S': |
| 4920 type = XML_REGEXP_NOTSPACE; | 4932 type = XML_REGEXP_NOTSPACE; |
| 4921 break; | 4933 break; |
| 4922 » case 'i': | 4934 » case 'i': |
| 4923 type = XML_REGEXP_INITNAME; | 4935 type = XML_REGEXP_INITNAME; |
| 4924 break; | 4936 break; |
| 4925 » case 'I': | 4937 » case 'I': |
| 4926 type = XML_REGEXP_NOTINITNAME; | 4938 type = XML_REGEXP_NOTINITNAME; |
| 4927 break; | 4939 break; |
| 4928 » case 'c': | 4940 » case 'c': |
| 4929 type = XML_REGEXP_NAMECHAR; | 4941 type = XML_REGEXP_NAMECHAR; |
| 4930 break; | 4942 break; |
| 4931 » case 'C': | 4943 » case 'C': |
| 4932 type = XML_REGEXP_NOTNAMECHAR; | 4944 type = XML_REGEXP_NOTNAMECHAR; |
| 4933 break; | 4945 break; |
| 4934 » case 'd': | 4946 » case 'd': |
| 4935 type = XML_REGEXP_DECIMAL; | 4947 type = XML_REGEXP_DECIMAL; |
| 4936 break; | 4948 break; |
| 4937 » case 'D': | 4949 » case 'D': |
| 4938 type = XML_REGEXP_NOTDECIMAL; | 4950 type = XML_REGEXP_NOTDECIMAL; |
| 4939 break; | 4951 break; |
| 4940 » case 'w': | 4952 » case 'w': |
| 4941 type = XML_REGEXP_REALCHAR; | 4953 type = XML_REGEXP_REALCHAR; |
| 4942 break; | 4954 break; |
| 4943 » case 'W': | 4955 » case 'W': |
| 4944 type = XML_REGEXP_NOTREALCHAR; | 4956 type = XML_REGEXP_NOTREALCHAR; |
| 4945 break; | 4957 break; |
| 4946 } | 4958 } |
| 4947 NEXT; | 4959 NEXT; |
| 4948 if (ctxt->atom == NULL) { | 4960 if (ctxt->atom == NULL) { |
| 4949 ctxt->atom = xmlRegNewAtom(ctxt, type); | 4961 ctxt->atom = xmlRegNewAtom(ctxt, type); |
| 4950 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { | 4962 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { |
| 4951 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, | 4963 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, |
| 4952 type, 0, 0, NULL); | 4964 type, 0, 0, NULL); |
| 4953 } | 4965 } |
| 4954 } else { | 4966 } else { |
| 4955 ERROR("Wrong escape sequence, misuse of character '\\'"); | 4967 ERROR("Wrong escape sequence, misuse of character '\\'"); |
| 4956 } | 4968 } |
| 4957 } | 4969 } |
| 4958 | 4970 |
| 4959 /** | 4971 /** |
| 4960 * xmlFAParseCharRange: | 4972 * xmlFAParseCharRange: |
| 4961 * @ctxt: a regexp parser context | 4973 * @ctxt: a regexp parser context |
| 4962 * | 4974 * |
| 4963 * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash | 4975 * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash |
| 4964 * [18] seRange ::= charOrEsc '-' charOrEsc | 4976 * [18] seRange ::= charOrEsc '-' charOrEsc |
| 4965 * [20] charOrEsc ::= XmlChar | SingleCharEsc | 4977 * [20] charOrEsc ::= XmlChar | SingleCharEsc |
| 4966 * [21] XmlChar ::= [^\#x2D#x5B#x5D] | 4978 * [21] XmlChar ::= [^\#x2D#x5B#x5D] |
| 4967 * [22] XmlCharIncDash ::= [^\#x5B#x5D] | 4979 * [22] XmlCharIncDash ::= [^\#x5B#x5D] |
| 4968 */ | 4980 */ |
| 4969 static void | 4981 static void |
| 4970 xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { | 4982 xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { |
| 4971 int cur, len; | 4983 int cur, len; |
| 4972 int start = -1; | 4984 int start = -1; |
| 4973 int end = -1; | 4985 int end = -1; |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5068 } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && | 5080 } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && |
| 5069 (CUR != 0) && (ctxt->error == 0)); | 5081 (CUR != 0) && (ctxt->error == 0)); |
| 5070 } | 5082 } |
| 5071 | 5083 |
| 5072 /** | 5084 /** |
| 5073 * xmlFAParseCharGroup: | 5085 * xmlFAParseCharGroup: |
| 5074 * @ctxt: a regexp parser context | 5086 * @ctxt: a regexp parser context |
| 5075 * | 5087 * |
| 5076 * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub | 5088 * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub |
| 5077 * [15] negCharGroup ::= '^' posCharGroup | 5089 * [15] negCharGroup ::= '^' posCharGroup |
| 5078 * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr | 5090 * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr |
| 5079 * [12] charClassExpr ::= '[' charGroup ']' | 5091 * [12] charClassExpr ::= '[' charGroup ']' |
| 5080 */ | 5092 */ |
| 5081 static void | 5093 static void |
| 5082 xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { | 5094 xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { |
| 5083 int n = ctxt->neg; | 5095 int n = ctxt->neg; |
| 5084 while ((CUR != ']') && (ctxt->error == 0)) { | 5096 while ((CUR != ']') && (ctxt->error == 0)) { |
| 5085 if (CUR == '^') { | 5097 if (CUR == '^') { |
| 5086 int neg = ctxt->neg; | 5098 int neg = ctxt->neg; |
| 5087 | 5099 |
| 5088 NEXT; | 5100 NEXT; |
| (...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5316 * [2] branch ::= piece* | 5328 * [2] branch ::= piece* |
| 5317 */ | 5329 */ |
| 5318 static int | 5330 static int |
| 5319 xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) { | 5331 xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) { |
| 5320 xmlRegStatePtr previous; | 5332 xmlRegStatePtr previous; |
| 5321 int ret; | 5333 int ret; |
| 5322 | 5334 |
| 5323 previous = ctxt->state; | 5335 previous = ctxt->state; |
| 5324 ret = xmlFAParsePiece(ctxt); | 5336 ret = xmlFAParsePiece(ctxt); |
| 5325 if (ret != 0) { | 5337 if (ret != 0) { |
| 5326 » if (xmlFAGenerateTransitions(ctxt, previous, | 5338 » if (xmlFAGenerateTransitions(ctxt, previous, |
| 5327 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) | 5339 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) |
| 5328 return(-1); | 5340 return(-1); |
| 5329 previous = ctxt->state; | 5341 previous = ctxt->state; |
| 5330 ctxt->atom = NULL; | 5342 ctxt->atom = NULL; |
| 5331 } | 5343 } |
| 5332 while ((ret != 0) && (ctxt->error == 0)) { | 5344 while ((ret != 0) && (ctxt->error == 0)) { |
| 5333 ret = xmlFAParsePiece(ctxt); | 5345 ret = xmlFAParsePiece(ctxt); |
| 5334 if (ret != 0) { | 5346 if (ret != 0) { |
| 5335 » if (xmlFAGenerateTransitions(ctxt, previous, | 5347 » if (xmlFAGenerateTransitions(ctxt, previous, |
| 5336 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) | 5348 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) |
| 5337 return(-1); | 5349 return(-1); |
| 5338 previous = ctxt->state; | 5350 previous = ctxt->state; |
| 5339 ctxt->atom = NULL; | 5351 ctxt->atom = NULL; |
| 5340 } | 5352 } |
| 5341 } | 5353 } |
| 5342 return(0); | 5354 return(0); |
| 5343 } | 5355 } |
| 5344 | 5356 |
| 5345 /** | 5357 /** |
| (...skipping 17 matching lines...) Expand all Loading... |
| 5363 #endif | 5375 #endif |
| 5364 ctxt->state->type = XML_REGEXP_FINAL_STATE; | 5376 ctxt->state->type = XML_REGEXP_FINAL_STATE; |
| 5365 } | 5377 } |
| 5366 if (CUR != '|') { | 5378 if (CUR != '|') { |
| 5367 ctxt->end = ctxt->state; | 5379 ctxt->end = ctxt->state; |
| 5368 return; | 5380 return; |
| 5369 } | 5381 } |
| 5370 end = ctxt->state; | 5382 end = ctxt->state; |
| 5371 while ((CUR == '|') && (ctxt->error == 0)) { | 5383 while ((CUR == '|') && (ctxt->error == 0)) { |
| 5372 NEXT; | 5384 NEXT; |
| 5385 if (CUR == 0) { |
| 5386 ERROR("expecting a branch after |") |
| 5387 return; |
| 5388 } |
| 5373 ctxt->state = start; | 5389 ctxt->state = start; |
| 5374 ctxt->end = NULL; | 5390 ctxt->end = NULL; |
| 5375 xmlFAParseBranch(ctxt, end); | 5391 xmlFAParseBranch(ctxt, end); |
| 5376 } | 5392 } |
| 5377 if (!top) { | 5393 if (!top) { |
| 5378 ctxt->state = end; | 5394 ctxt->state = end; |
| 5379 ctxt->end = end; | 5395 ctxt->end = end; |
| 5380 } | 5396 } |
| 5381 } | 5397 } |
| 5382 | 5398 |
| 5383 /************************************************************************ | 5399 /************************************************************************ |
| 5384 * » » » » » » » » » * | 5400 *» » » » » » » » » * |
| 5385 * » » » The basic API» » » » » * | 5401 *» » » The basic API» » » » » * |
| 5386 * » » » » » » » » » * | 5402 *» » » » » » » » » * |
| 5387 ************************************************************************/ | 5403 ************************************************************************/ |
| 5388 | 5404 |
| 5389 /** | 5405 /** |
| 5390 * xmlRegexpPrint: | 5406 * xmlRegexpPrint: |
| 5391 * @output: the file for the output debug | 5407 * @output: the file for the output debug |
| 5392 * @regexp: the compiled regexp | 5408 * @regexp: the compiled regexp |
| 5393 * | 5409 * |
| 5394 * Print the content of the compiled regular expression | 5410 * Print the content of the compiled regular expression |
| 5395 */ | 5411 */ |
| 5396 void | 5412 void |
| (...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5563 for (i = 0; i < regexp->nbstrings;i++) | 5579 for (i = 0; i < regexp->nbstrings;i++) |
| 5564 xmlFree(regexp->stringMap[i]); | 5580 xmlFree(regexp->stringMap[i]); |
| 5565 xmlFree(regexp->stringMap); | 5581 xmlFree(regexp->stringMap); |
| 5566 } | 5582 } |
| 5567 | 5583 |
| 5568 xmlFree(regexp); | 5584 xmlFree(regexp); |
| 5569 } | 5585 } |
| 5570 | 5586 |
| 5571 #ifdef LIBXML_AUTOMATA_ENABLED | 5587 #ifdef LIBXML_AUTOMATA_ENABLED |
| 5572 /************************************************************************ | 5588 /************************************************************************ |
| 5573 * » » » » » » » » » * | 5589 *» » » » » » » » » * |
| 5574 * » » » The Automata interface» » » » * | 5590 *» » » The Automata interface» » » » * |
| 5575 * » » » » » » » » » * | 5591 *» » » » » » » » » * |
| 5576 ************************************************************************/ | 5592 ************************************************************************/ |
| 5577 | 5593 |
| 5578 /** | 5594 /** |
| 5579 * xmlNewAutomata: | 5595 * xmlNewAutomata: |
| 5580 * | 5596 * |
| 5581 * Create a new automata | 5597 * Create a new automata |
| 5582 * | 5598 * |
| 5583 * Returns the new object or NULL in case of failure | 5599 * Returns the new object or NULL in case of failure |
| 5584 */ | 5600 */ |
| 5585 xmlAutomataPtr | 5601 xmlAutomataPtr |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5686 xmlAutomataStatePtr to, const xmlChar *token, | 5702 xmlAutomataStatePtr to, const xmlChar *token, |
| 5687 void *data) { | 5703 void *data) { |
| 5688 xmlRegAtomPtr atom; | 5704 xmlRegAtomPtr atom; |
| 5689 | 5705 |
| 5690 if ((am == NULL) || (from == NULL) || (token == NULL)) | 5706 if ((am == NULL) || (from == NULL) || (token == NULL)) |
| 5691 return(NULL); | 5707 return(NULL); |
| 5692 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); | 5708 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); |
| 5693 if (atom == NULL) | 5709 if (atom == NULL) |
| 5694 return(NULL); | 5710 return(NULL); |
| 5695 atom->data = data; | 5711 atom->data = data; |
| 5696 if (atom == NULL) | |
| 5697 return(NULL); | |
| 5698 atom->valuep = xmlStrdup(token); | 5712 atom->valuep = xmlStrdup(token); |
| 5699 | 5713 |
| 5700 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { | 5714 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { |
| 5701 xmlRegFreeAtom(atom); | 5715 xmlRegFreeAtom(atom); |
| 5702 return(NULL); | 5716 return(NULL); |
| 5703 } | 5717 } |
| 5704 if (to == NULL) | 5718 if (to == NULL) |
| 5705 return(am->state); | 5719 return(am->state); |
| 5706 return(to); | 5720 return(to); |
| 5707 } | 5721 } |
| (...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5836 * @from: the starting point of the transition | 5850 * @from: the starting point of the transition |
| 5837 * @to: the target point of the transition or NULL | 5851 * @to: the target point of the transition or NULL |
| 5838 * @token: the input string associated to that transition | 5852 * @token: the input string associated to that transition |
| 5839 * @token2: the second input string associated to that transition | 5853 * @token2: the second input string associated to that transition |
| 5840 * @min: the minimum successive occurences of token | 5854 * @min: the minimum successive occurences of token |
| 5841 * @max: the maximum successive occurences of token | 5855 * @max: the maximum successive occurences of token |
| 5842 * @data: data associated to the transition | 5856 * @data: data associated to the transition |
| 5843 * | 5857 * |
| 5844 * If @to is NULL, this creates first a new target state in the automata | 5858 * If @to is NULL, this creates first a new target state in the automata |
| 5845 * and then adds a transition from the @from state to the target state | 5859 * and then adds a transition from the @from state to the target state |
| 5846 * activated by a succession of input of value @token and @token2 and | 5860 * activated by a succession of input of value @token and @token2 and |
| 5847 * whose number is between @min and @max | 5861 * whose number is between @min and @max |
| 5848 * | 5862 * |
| 5849 * Returns the target state or NULL in case of error | 5863 * Returns the target state or NULL in case of error |
| 5850 */ | 5864 */ |
| 5851 xmlAutomataStatePtr | 5865 xmlAutomataStatePtr |
| 5852 xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, | 5866 xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, |
| 5853 xmlAutomataStatePtr to, const xmlChar *token, | 5867 xmlAutomataStatePtr to, const xmlChar *token, |
| 5854 const xmlChar *token2, | 5868 const xmlChar *token2, |
| 5855 int min, int max, void *data) { | 5869 int min, int max, void *data) { |
| 5856 xmlRegAtomPtr atom; | 5870 xmlRegAtomPtr atom; |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5990 * @from: the starting point of the transition | 6004 * @from: the starting point of the transition |
| 5991 * @to: the target point of the transition or NULL | 6005 * @to: the target point of the transition or NULL |
| 5992 * @token: the input string associated to that transition | 6006 * @token: the input string associated to that transition |
| 5993 * @token2: the second input string associated to that transition | 6007 * @token2: the second input string associated to that transition |
| 5994 * @min: the minimum successive occurences of token | 6008 * @min: the minimum successive occurences of token |
| 5995 * @max: the maximum successive occurences of token | 6009 * @max: the maximum successive occurences of token |
| 5996 * @data: data associated to the transition | 6010 * @data: data associated to the transition |
| 5997 * | 6011 * |
| 5998 * If @to is NULL, this creates first a new target state in the automata | 6012 * If @to is NULL, this creates first a new target state in the automata |
| 5999 * and then adds a transition from the @from state to the target state | 6013 * and then adds a transition from the @from state to the target state |
| 6000 * activated by a succession of input of value @token and @token2 and whose | 6014 * activated by a succession of input of value @token and @token2 and whose |
| 6001 * number is between @min and @max, moreover that transition can only be | 6015 * number is between @min and @max, moreover that transition can only be |
| 6002 * crossed once. | 6016 * crossed once. |
| 6003 * | 6017 * |
| 6004 * Returns the target state or NULL in case of error | 6018 * Returns the target state or NULL in case of error |
| 6005 */ | 6019 */ |
| 6006 xmlAutomataStatePtr | 6020 xmlAutomataStatePtr |
| 6007 xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, | 6021 xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, |
| 6008 xmlAutomataStatePtr to, const xmlChar *token, | 6022 xmlAutomataStatePtr to, const xmlChar *token, |
| 6009 const xmlChar *token2, | 6023 const xmlChar *token2, |
| 6010 int min, int max, void *data) { | 6024 int min, int max, void *data) { |
| 6011 xmlRegAtomPtr atom; | 6025 xmlRegAtomPtr atom; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 6033 if (str == NULL) { | 6047 if (str == NULL) { |
| 6034 xmlRegFreeAtom(atom); | 6048 xmlRegFreeAtom(atom); |
| 6035 return(NULL); | 6049 return(NULL); |
| 6036 } | 6050 } |
| 6037 memcpy(&str[0], token, lenp); | 6051 memcpy(&str[0], token, lenp); |
| 6038 str[lenp] = '|'; | 6052 str[lenp] = '|'; |
| 6039 memcpy(&str[lenp + 1], token2, lenn); | 6053 memcpy(&str[lenp + 1], token2, lenn); |
| 6040 str[lenn + lenp + 1] = 0; | 6054 str[lenn + lenp + 1] = 0; |
| 6041 | 6055 |
| 6042 atom->valuep = str; | 6056 atom->valuep = str; |
| 6043 } | 6057 } |
| 6044 atom->data = data; | 6058 atom->data = data; |
| 6045 atom->quant = XML_REGEXP_QUANT_ONCEONLY; | 6059 atom->quant = XML_REGEXP_QUANT_ONCEONLY; |
| 6046 atom->min = min; | 6060 atom->min = min; |
| 6047 atom->max = max; | 6061 atom->max = max; |
| 6048 /* | 6062 /* |
| 6049 * associate a counter to the transition. | 6063 * associate a counter to the transition. |
| 6050 */ | 6064 */ |
| 6051 counter = xmlRegGetCounter(am); | 6065 counter = xmlRegGetCounter(am); |
| 6052 am->counters[counter].min = 1; | 6066 am->counters[counter].min = 1; |
| 6053 am->counters[counter].max = 1; | 6067 am->counters[counter].max = 1; |
| 6054 | 6068 |
| 6055 /* xmlFAGenerateTransitions(am, from, to, atom); */ | 6069 /* xmlFAGenerateTransitions(am, from, to, atom); */ |
| 6056 if (to == NULL) { | 6070 if (to == NULL) { |
| 6057 to = xmlRegNewState(am); | 6071 to = xmlRegNewState(am); |
| 6058 xmlRegStatePush(am, to); | 6072 xmlRegStatePush(am, to); |
| 6059 } | 6073 } |
| 6060 xmlRegStateAddTrans(am, from, atom, to, counter, -1); | 6074 xmlRegStateAddTrans(am, from, atom, to, counter, -1); |
| 6061 xmlRegAtomPush(am, atom); | 6075 xmlRegAtomPush(am, atom); |
| 6062 am->state = to; | 6076 am->state = to; |
| 6063 return(to); | 6077 return(to); |
| 6064 } | 6078 } |
| 6065 | 6079 |
| 6066 | 6080 |
| 6067 | 6081 |
| 6068 /** | 6082 /** |
| 6069 * xmlAutomataNewOnceTrans: | 6083 * xmlAutomataNewOnceTrans: |
| 6070 * @am: an automata | 6084 * @am: an automata |
| 6071 * @from: the starting point of the transition | 6085 * @from: the starting point of the transition |
| 6072 * @to: the target point of the transition or NULL | 6086 * @to: the target point of the transition or NULL |
| 6073 * @token: the input string associated to that transition | 6087 * @token: the input string associated to that transition |
| 6074 * @min: the minimum successive occurences of token | 6088 * @min: the minimum successive occurences of token |
| 6075 * @max: the maximum successive occurences of token | 6089 * @max: the maximum successive occurences of token |
| 6076 * @data: data associated to the transition | 6090 * @data: data associated to the transition |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6125 /** | 6139 /** |
| 6126 * xmlAutomataNewState: | 6140 * xmlAutomataNewState: |
| 6127 * @am: an automata | 6141 * @am: an automata |
| 6128 * | 6142 * |
| 6129 * Create a new disconnected state in the automata | 6143 * Create a new disconnected state in the automata |
| 6130 * | 6144 * |
| 6131 * Returns the new state or NULL in case of error | 6145 * Returns the new state or NULL in case of error |
| 6132 */ | 6146 */ |
| 6133 xmlAutomataStatePtr | 6147 xmlAutomataStatePtr |
| 6134 xmlAutomataNewState(xmlAutomataPtr am) { | 6148 xmlAutomataNewState(xmlAutomataPtr am) { |
| 6135 xmlAutomataStatePtr to; | 6149 xmlAutomataStatePtr to; |
| 6136 | 6150 |
| 6137 if (am == NULL) | 6151 if (am == NULL) |
| 6138 return(NULL); | 6152 return(NULL); |
| 6139 to = xmlRegNewState(am); | 6153 to = xmlRegNewState(am); |
| 6140 xmlRegStatePush(am, to); | 6154 xmlRegStatePush(am, to); |
| 6141 return(to); | 6155 return(to); |
| 6142 } | 6156 } |
| 6143 | 6157 |
| 6144 /** | 6158 /** |
| 6145 * xmlAutomataNewEpsilon: | 6159 * xmlAutomataNewEpsilon: |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6192 /** | 6206 /** |
| 6193 * xmlAutomataNewCounter: | 6207 * xmlAutomataNewCounter: |
| 6194 * @am: an automata | 6208 * @am: an automata |
| 6195 * @min: the minimal value on the counter | 6209 * @min: the minimal value on the counter |
| 6196 * @max: the maximal value on the counter | 6210 * @max: the maximal value on the counter |
| 6197 * | 6211 * |
| 6198 * Create a new counter | 6212 * Create a new counter |
| 6199 * | 6213 * |
| 6200 * Returns the counter number or -1 in case of error | 6214 * Returns the counter number or -1 in case of error |
| 6201 */ | 6215 */ |
| 6202 int» » | 6216 int |
| 6203 xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { | 6217 xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { |
| 6204 int ret; | 6218 int ret; |
| 6205 | 6219 |
| 6206 if (am == NULL) | 6220 if (am == NULL) |
| 6207 return(-1); | 6221 return(-1); |
| 6208 | 6222 |
| 6209 ret = xmlRegGetCounter(am); | 6223 ret = xmlRegGetCounter(am); |
| 6210 if (ret < 0) | 6224 if (ret < 0) |
| 6211 return(-1); | 6225 return(-1); |
| 6212 am->counters[ret].min = min; | 6226 am->counters[ret].min = min; |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6264 | 6278 |
| 6265 /** | 6279 /** |
| 6266 * xmlAutomataCompile: | 6280 * xmlAutomataCompile: |
| 6267 * @am: an automata | 6281 * @am: an automata |
| 6268 * | 6282 * |
| 6269 * Compile the automata into a Reg Exp ready for being executed. | 6283 * Compile the automata into a Reg Exp ready for being executed. |
| 6270 * The automata should be free after this point. | 6284 * The automata should be free after this point. |
| 6271 * | 6285 * |
| 6272 * Returns the compiled regexp or NULL in case of error | 6286 * Returns the compiled regexp or NULL in case of error |
| 6273 */ | 6287 */ |
| 6274 xmlRegexpPtr | 6288 xmlRegexpPtr |
| 6275 xmlAutomataCompile(xmlAutomataPtr am) { | 6289 xmlAutomataCompile(xmlAutomataPtr am) { |
| 6276 xmlRegexpPtr ret; | 6290 xmlRegexpPtr ret; |
| 6277 | 6291 |
| 6278 if ((am == NULL) || (am->error != 0)) return(NULL); | 6292 if ((am == NULL) || (am->error != 0)) return(NULL); |
| 6279 xmlFAEliminateEpsilonTransitions(am); | 6293 xmlFAEliminateEpsilonTransitions(am); |
| 6280 /* xmlFAComputesDeterminism(am); */ | 6294 /* xmlFAComputesDeterminism(am); */ |
| 6281 ret = xmlRegEpxFromParse(am); | 6295 ret = xmlRegEpxFromParse(am); |
| 6282 | 6296 |
| 6283 return(ret); | 6297 return(ret); |
| 6284 } | 6298 } |
| 6285 | 6299 |
| 6286 /** | 6300 /** |
| 6287 * xmlAutomataIsDeterminist: | 6301 * xmlAutomataIsDeterminist: |
| 6288 * @am: an automata | 6302 * @am: an automata |
| 6289 * | 6303 * |
| 6290 * Checks if an automata is determinist. | 6304 * Checks if an automata is determinist. |
| 6291 * | 6305 * |
| 6292 * Returns 1 if true, 0 if not, and -1 in case of error | 6306 * Returns 1 if true, 0 if not, and -1 in case of error |
| 6293 */ | 6307 */ |
| 6294 int | 6308 int |
| 6295 xmlAutomataIsDeterminist(xmlAutomataPtr am) { | 6309 xmlAutomataIsDeterminist(xmlAutomataPtr am) { |
| 6296 int ret; | 6310 int ret; |
| 6297 | 6311 |
| 6298 if (am == NULL) | 6312 if (am == NULL) |
| 6299 return(-1); | 6313 return(-1); |
| 6300 | 6314 |
| 6301 ret = xmlFAComputesDeterminism(am); | 6315 ret = xmlFAComputesDeterminism(am); |
| 6302 return(ret); | 6316 return(ret); |
| 6303 } | 6317 } |
| 6304 #endif /* LIBXML_AUTOMATA_ENABLED */ | 6318 #endif /* LIBXML_AUTOMATA_ENABLED */ |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6337 * | 6351 * |
| 6338 * Returns the context or NULL in case of error | 6352 * Returns the context or NULL in case of error |
| 6339 */ | 6353 */ |
| 6340 xmlExpCtxtPtr | 6354 xmlExpCtxtPtr |
| 6341 xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) { | 6355 xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) { |
| 6342 xmlExpCtxtPtr ret; | 6356 xmlExpCtxtPtr ret; |
| 6343 int size = 256; | 6357 int size = 256; |
| 6344 | 6358 |
| 6345 if (maxNodes <= 4096) | 6359 if (maxNodes <= 4096) |
| 6346 maxNodes = 4096; | 6360 maxNodes = 4096; |
| 6347 | 6361 |
| 6348 ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt)); | 6362 ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt)); |
| 6349 if (ret == NULL) | 6363 if (ret == NULL) |
| 6350 return(NULL); | 6364 return(NULL); |
| 6351 memset(ret, 0, sizeof(xmlExpCtxt)); | 6365 memset(ret, 0, sizeof(xmlExpCtxt)); |
| 6352 ret->size = size; | 6366 ret->size = size; |
| 6353 ret->nbElems = 0; | 6367 ret->nbElems = 0; |
| 6354 ret->maxNodes = maxNodes; | 6368 ret->maxNodes = maxNodes; |
| 6355 ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr)); | 6369 ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr)); |
| 6356 if (ret->table == NULL) { | 6370 if (ret->table == NULL) { |
| 6357 xmlFree(ret); | 6371 xmlFree(ret); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6391 /************************************************************************ | 6405 /************************************************************************ |
| 6392 * * | 6406 * * |
| 6393 * Structure associated to an expression node * | 6407 * Structure associated to an expression node * |
| 6394 * * | 6408 * * |
| 6395 ************************************************************************/ | 6409 ************************************************************************/ |
| 6396 #define MAX_NODES 10000 | 6410 #define MAX_NODES 10000 |
| 6397 | 6411 |
| 6398 /* #define DEBUG_DERIV */ | 6412 /* #define DEBUG_DERIV */ |
| 6399 | 6413 |
| 6400 /* | 6414 /* |
| 6401 * TODO: | 6415 * TODO: |
| 6402 * - Wildcards | 6416 * - Wildcards |
| 6403 * - public API for creation | 6417 * - public API for creation |
| 6404 * | 6418 * |
| 6405 * Started | 6419 * Started |
| 6406 * - regression testing | 6420 * - regression testing |
| 6407 * | 6421 * |
| 6408 * Done | 6422 * Done |
| 6409 * - split into module and test tool | 6423 * - split into module and test tool |
| 6410 * - memleaks | 6424 * - memleaks |
| 6411 */ | 6425 */ |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6459 * * | 6473 * * |
| 6460 ************************************************************************/ | 6474 ************************************************************************/ |
| 6461 /* | 6475 /* |
| 6462 * xmlExpHashNameComputeKey: | 6476 * xmlExpHashNameComputeKey: |
| 6463 * Calculate the hash key for a token | 6477 * Calculate the hash key for a token |
| 6464 */ | 6478 */ |
| 6465 static unsigned short | 6479 static unsigned short |
| 6466 xmlExpHashNameComputeKey(const xmlChar *name) { | 6480 xmlExpHashNameComputeKey(const xmlChar *name) { |
| 6467 unsigned short value = 0L; | 6481 unsigned short value = 0L; |
| 6468 char ch; | 6482 char ch; |
| 6469 | 6483 |
| 6470 if (name != NULL) { | 6484 if (name != NULL) { |
| 6471 value += 30 * (*name); | 6485 value += 30 * (*name); |
| 6472 while ((ch = *name++) != 0) { | 6486 while ((ch = *name++) != 0) { |
| 6473 » value = value ^ ((value << 5) + (value >> 3) + (unsigned short)ch); | 6487 » value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch); |
| 6474 } | 6488 } |
| 6475 } | 6489 } |
| 6476 return (value); | 6490 return (value); |
| 6477 } | 6491 } |
| 6478 | 6492 |
| 6479 /* | 6493 /* |
| 6480 * xmlExpHashComputeKey: | 6494 * xmlExpHashComputeKey: |
| 6481 * Calculate the hash key for a compound expression | 6495 * Calculate the hash key for a compound expression |
| 6482 */ | 6496 */ |
| 6483 static unsigned short | 6497 static unsigned short |
| 6484 xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left, | 6498 xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left, |
| 6485 xmlExpNodePtr right) { | 6499 xmlExpNodePtr right) { |
| 6486 unsigned long value; | 6500 unsigned long value; |
| 6487 unsigned short ret; | 6501 unsigned short ret; |
| 6488 | 6502 |
| 6489 switch (type) { | 6503 switch (type) { |
| 6490 case XML_EXP_SEQ: | 6504 case XML_EXP_SEQ: |
| 6491 value = left->key; | 6505 value = left->key; |
| 6492 value += right->key; | 6506 value += right->key; |
| 6493 value *= 3; | 6507 value *= 3; |
| 6494 ret = (unsigned short) value; | 6508 ret = (unsigned short) value; |
| 6495 break; | 6509 break; |
| 6496 case XML_EXP_OR: | 6510 case XML_EXP_OR: |
| 6497 value = left->key; | 6511 value = left->key; |
| 6498 value += right->key; | 6512 value += right->key; |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6619 tmp = left->exp_right; | 6633 tmp = left->exp_right; |
| 6620 left->exp_right = left->exp_left; | 6634 left->exp_right = left->exp_left; |
| 6621 left->exp_left = tmp; | 6635 left->exp_left = tmp; |
| 6622 } | 6636 } |
| 6623 left->exp_right->ref++; | 6637 left->exp_right->ref++; |
| 6624 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right, | 6638 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right, |
| 6625 NULL, 0, 0); | 6639 NULL, 0, 0); |
| 6626 left->exp_left->ref++; | 6640 left->exp_left->ref++; |
| 6627 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp, | 6641 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp, |
| 6628 NULL, 0, 0); | 6642 NULL, 0, 0); |
| 6629 » | 6643 |
| 6630 xmlExpFree(ctxt, left); | 6644 xmlExpFree(ctxt, left); |
| 6631 return(tmp); | 6645 return(tmp); |
| 6632 } | 6646 } |
| 6633 if (right->type == XML_EXP_OR) { | 6647 if (right->type == XML_EXP_OR) { |
| 6634 /* Ordering in the tree */ | 6648 /* Ordering in the tree */ |
| 6635 /* C | (A | B) -> A | (B | C) */ | 6649 /* C | (A | B) -> A | (B | C) */ |
| 6636 if (left->key > right->exp_right->key) { | 6650 if (left->key > right->exp_right->key) { |
| 6637 xmlExpNodePtr tmp; | 6651 xmlExpNodePtr tmp; |
| 6638 right->exp_right->ref++; | 6652 right->exp_right->ref++; |
| 6639 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right, | 6653 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right, |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6676 return(right); | 6690 return(right); |
| 6677 } | 6691 } |
| 6678 /* Empty reduction rules */ | 6692 /* Empty reduction rules */ |
| 6679 if (right->type == XML_EXP_EMPTY) { | 6693 if (right->type == XML_EXP_EMPTY) { |
| 6680 return(left); | 6694 return(left); |
| 6681 } | 6695 } |
| 6682 if (left->type == XML_EXP_EMPTY) { | 6696 if (left->type == XML_EXP_EMPTY) { |
| 6683 return(right); | 6697 return(right); |
| 6684 } | 6698 } |
| 6685 kbase = xmlExpHashComputeKey(type, left, right); | 6699 kbase = xmlExpHashComputeKey(type, left, right); |
| 6686 } else | 6700 } else |
| 6687 return(NULL); | 6701 return(NULL); |
| 6688 | 6702 |
| 6689 key = kbase % ctxt->size; | 6703 key = kbase % ctxt->size; |
| 6690 if (ctxt->table[key] != NULL) { | 6704 if (ctxt->table[key] != NULL) { |
| 6691 for (insert = ctxt->table[key]; insert != NULL; | 6705 for (insert = ctxt->table[key]; insert != NULL; |
| 6692 insert = insert->next) { | 6706 insert = insert->next) { |
| 6693 if ((insert->key == kbase) && | 6707 if ((insert->key == kbase) && |
| 6694 (insert->type == type)) { | 6708 (insert->type == type)) { |
| 6695 if (type == XML_EXP_ATOM) { | 6709 if (type == XML_EXP_ATOM) { |
| 6696 if (name == insert->exp_str) { | 6710 if (name == insert->exp_str) { |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6817 void | 6831 void |
| 6818 xmlExpRef(xmlExpNodePtr exp) { | 6832 xmlExpRef(xmlExpNodePtr exp) { |
| 6819 if (exp != NULL) | 6833 if (exp != NULL) |
| 6820 exp->ref++; | 6834 exp->ref++; |
| 6821 } | 6835 } |
| 6822 | 6836 |
| 6823 /** | 6837 /** |
| 6824 * xmlExpNewAtom: | 6838 * xmlExpNewAtom: |
| 6825 * @ctxt: the expression context | 6839 * @ctxt: the expression context |
| 6826 * @name: the atom name | 6840 * @name: the atom name |
| 6827 * @len: the atom name lenght in byte (or -1); | 6841 * @len: the atom name length in byte (or -1); |
| 6828 * | 6842 * |
| 6829 * Get the atom associated to this name from that context | 6843 * Get the atom associated to this name from that context |
| 6830 * | 6844 * |
| 6831 * Returns the node or NULL in case of error | 6845 * Returns the node or NULL in case of error |
| 6832 */ | 6846 */ |
| 6833 xmlExpNodePtr | 6847 xmlExpNodePtr |
| 6834 xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) { | 6848 xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) { |
| 6835 if ((ctxt == NULL) || (name == NULL)) | 6849 if ((ctxt == NULL) || (name == NULL)) |
| 6836 return(NULL); | 6850 return(NULL); |
| 6837 name = xmlDictLookup(ctxt->dict, name, len); | 6851 name = xmlDictLookup(ctxt->dict, name, len); |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6917 NULL, NULL, min, max)); | 6931 NULL, NULL, min, max)); |
| 6918 } | 6932 } |
| 6919 | 6933 |
| 6920 /************************************************************************ | 6934 /************************************************************************ |
| 6921 * * | 6935 * * |
| 6922 * Public API for operations on expressions * | 6936 * Public API for operations on expressions * |
| 6923 * * | 6937 * * |
| 6924 ************************************************************************/ | 6938 ************************************************************************/ |
| 6925 | 6939 |
| 6926 static int | 6940 static int |
| 6927 xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, | 6941 xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, |
| 6928 const xmlChar**list, int len, int nb) { | 6942 const xmlChar**list, int len, int nb) { |
| 6929 int tmp, tmp2; | 6943 int tmp, tmp2; |
| 6930 tail: | 6944 tail: |
| 6931 switch (exp->type) { | 6945 switch (exp->type) { |
| 6932 case XML_EXP_EMPTY: | 6946 case XML_EXP_EMPTY: |
| 6933 return(0); | 6947 return(0); |
| 6934 case XML_EXP_ATOM: | 6948 case XML_EXP_ATOM: |
| 6935 for (tmp = 0;tmp < nb;tmp++) | 6949 for (tmp = 0;tmp < nb;tmp++) |
| 6936 if (list[tmp] == exp->exp_str) | 6950 if (list[tmp] == exp->exp_str) |
| 6937 return(0); | 6951 return(0); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 6954 return(tmp + tmp2); | 6968 return(tmp + tmp2); |
| 6955 } | 6969 } |
| 6956 return(-1); | 6970 return(-1); |
| 6957 } | 6971 } |
| 6958 | 6972 |
| 6959 /** | 6973 /** |
| 6960 * xmlExpGetLanguage: | 6974 * xmlExpGetLanguage: |
| 6961 * @ctxt: the expression context | 6975 * @ctxt: the expression context |
| 6962 * @exp: the expression | 6976 * @exp: the expression |
| 6963 * @langList: where to store the tokens | 6977 * @langList: where to store the tokens |
| 6964 * @len: the allocated lenght of @list | 6978 * @len: the allocated length of @list |
| 6965 * | 6979 * |
| 6966 * Find all the strings used in @exp and store them in @list | 6980 * Find all the strings used in @exp and store them in @list |
| 6967 * | 6981 * |
| 6968 * Returns the number of unique strings found, -1 in case of errors and | 6982 * Returns the number of unique strings found, -1 in case of errors and |
| 6969 * -2 if there is more than @len strings | 6983 * -2 if there is more than @len strings |
| 6970 */ | 6984 */ |
| 6971 int | 6985 int |
| 6972 xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, | 6986 xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, |
| 6973 const xmlChar**langList, int len) { | 6987 const xmlChar**langList, int len) { |
| 6974 if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0)) | 6988 if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0)) |
| 6975 return(-1); | 6989 return(-1); |
| 6976 return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0)); | 6990 return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0)); |
| 6977 } | 6991 } |
| 6978 | 6992 |
| 6979 static int | 6993 static int |
| 6980 xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, | 6994 xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, |
| 6981 const xmlChar**list, int len, int nb) { | 6995 const xmlChar**list, int len, int nb) { |
| 6982 int tmp, tmp2; | 6996 int tmp, tmp2; |
| 6983 tail: | 6997 tail: |
| 6984 switch (exp->type) { | 6998 switch (exp->type) { |
| 6985 case XML_EXP_FORBID: | 6999 case XML_EXP_FORBID: |
| 6986 return(0); | 7000 return(0); |
| 6987 case XML_EXP_EMPTY: | 7001 case XML_EXP_EMPTY: |
| 6988 return(0); | 7002 return(0); |
| 6989 case XML_EXP_ATOM: | 7003 case XML_EXP_ATOM: |
| 6990 for (tmp = 0;tmp < nb;tmp++) | 7004 for (tmp = 0;tmp < nb;tmp++) |
| (...skipping 29 matching lines...) Expand all Loading... |
| 7020 return(tmp + tmp2); | 7034 return(tmp + tmp2); |
| 7021 } | 7035 } |
| 7022 return(-1); | 7036 return(-1); |
| 7023 } | 7037 } |
| 7024 | 7038 |
| 7025 /** | 7039 /** |
| 7026 * xmlExpGetStart: | 7040 * xmlExpGetStart: |
| 7027 * @ctxt: the expression context | 7041 * @ctxt: the expression context |
| 7028 * @exp: the expression | 7042 * @exp: the expression |
| 7029 * @tokList: where to store the tokens | 7043 * @tokList: where to store the tokens |
| 7030 * @len: the allocated lenght of @list | 7044 * @len: the allocated length of @list |
| 7031 * | 7045 * |
| 7032 * Find all the strings that appears at the start of the languages | 7046 * Find all the strings that appears at the start of the languages |
| 7033 * accepted by @exp and store them in @list. E.g. for (a, b) | c | 7047 * accepted by @exp and store them in @list. E.g. for (a, b) | c |
| 7034 * it will return the list [a, c] | 7048 * it will return the list [a, c] |
| 7035 * | 7049 * |
| 7036 * Returns the number of unique strings found, -1 in case of errors and | 7050 * Returns the number of unique strings found, -1 in case of errors and |
| 7037 * -2 if there is more than @len strings | 7051 * -2 if there is more than @len strings |
| 7038 */ | 7052 */ |
| 7039 int | 7053 int |
| 7040 xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, | 7054 xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, |
| 7041 const xmlChar**tokList, int len) { | 7055 const xmlChar**tokList, int len) { |
| 7042 if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0)) | 7056 if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0)) |
| 7043 return(-1); | 7057 return(-1); |
| 7044 return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0)); | 7058 return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0)); |
| 7045 } | 7059 } |
| 7046 | 7060 |
| 7047 /** | 7061 /** |
| 7048 * xmlExpIsNillable: | 7062 * xmlExpIsNillable: |
| 7049 * @exp: the expression | 7063 * @exp: the expression |
| 7050 * | 7064 * |
| (...skipping 676 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7727 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0); | 7741 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0); |
| 7728 if (ret == NULL) { | 7742 if (ret == NULL) { |
| 7729 xmlFree((xmlChar **) tab); | 7743 xmlFree((xmlChar **) tab); |
| 7730 return(NULL); | 7744 return(NULL); |
| 7731 } | 7745 } |
| 7732 } | 7746 } |
| 7733 } | 7747 } |
| 7734 xmlFree((xmlChar **) tab); | 7748 xmlFree((xmlChar **) tab); |
| 7735 return(ret); | 7749 return(ret); |
| 7736 } | 7750 } |
| 7737 | 7751 |
| 7738 /** | 7752 /** |
| 7739 * xmlExpExpDerive: | 7753 * xmlExpExpDerive: |
| 7740 * @ctxt: the expressions context | 7754 * @ctxt: the expressions context |
| 7741 * @exp: the englobing expression | 7755 * @exp: the englobing expression |
| 7742 * @sub: the subexpression | 7756 * @sub: the subexpression |
| 7743 * | 7757 * |
| 7744 * Evaluates the expression resulting from @exp consuming a sub expression @sub | 7758 * Evaluates the expression resulting from @exp consuming a sub expression @sub |
| 7745 * Based on algebraic derivation and sometimes direct Brzozowski derivation | 7759 * Based on algebraic derivation and sometimes direct Brzozowski derivation |
| 7746 * it usually tatkes less than linear time and can handle expressions generating | 7760 * it usually tatkes less than linear time and can handle expressions generating |
| 7747 * infinite languages. | 7761 * infinite languages. |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7779 * @sub: the subexpression | 7793 * @sub: the subexpression |
| 7780 * | 7794 * |
| 7781 * Check whether @exp accepts all the languages accexpted by @sub | 7795 * Check whether @exp accepts all the languages accexpted by @sub |
| 7782 * the input being a subexpression. | 7796 * the input being a subexpression. |
| 7783 * | 7797 * |
| 7784 * Returns 1 if true 0 if false and -1 in case of failure. | 7798 * Returns 1 if true 0 if false and -1 in case of failure. |
| 7785 */ | 7799 */ |
| 7786 int | 7800 int |
| 7787 xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { | 7801 xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { |
| 7788 xmlExpNodePtr tmp; | 7802 xmlExpNodePtr tmp; |
| 7789 | 7803 |
| 7790 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL)) | 7804 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL)) |
| 7791 return(-1); | 7805 return(-1); |
| 7792 | 7806 |
| 7793 /* | 7807 /* |
| 7794 * TODO: speedup by checking the language of sub is a subset of the | 7808 * TODO: speedup by checking the language of sub is a subset of the |
| 7795 * language of exp | 7809 * language of exp |
| 7796 */ | 7810 */ |
| 7797 /* | 7811 /* |
| 7798 * O(1) speedups | 7812 * O(1) speedups |
| 7799 */ | 7813 */ |
| (...skipping 23 matching lines...) Expand all Loading... |
| 7823 if ((tmp != NULL) && (IS_NILLABLE(tmp))) { | 7837 if ((tmp != NULL) && (IS_NILLABLE(tmp))) { |
| 7824 xmlExpFree(ctxt, tmp); | 7838 xmlExpFree(ctxt, tmp); |
| 7825 return(1); | 7839 return(1); |
| 7826 } | 7840 } |
| 7827 xmlExpFree(ctxt, tmp); | 7841 xmlExpFree(ctxt, tmp); |
| 7828 return(0); | 7842 return(0); |
| 7829 } | 7843 } |
| 7830 | 7844 |
| 7831 /************************************************************************ | 7845 /************************************************************************ |
| 7832 * * | 7846 * * |
| 7833 *» » » Parsing expression » » » » * | 7847 *» » » Parsing expression» » » » * |
| 7834 * * | 7848 * * |
| 7835 ************************************************************************/ | 7849 ************************************************************************/ |
| 7836 | 7850 |
| 7837 static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt); | 7851 static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt); |
| 7838 | 7852 |
| 7839 #undef CUR | 7853 #undef CUR |
| 7840 #define CUR (*ctxt->cur) | 7854 #define CUR (*ctxt->cur) |
| 7841 #undef NEXT | 7855 #undef NEXT |
| 7842 #define NEXT ctxt->cur++; | 7856 #define NEXT ctxt->cur++; |
| 7843 #undef IS_BLANK | 7857 #undef IS_BLANK |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7927 } else if (CUR == '+') { | 7941 } else if (CUR == '+') { |
| 7928 NEXT | 7942 NEXT |
| 7929 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, | 7943 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, |
| 7930 1, -1); | 7944 1, -1); |
| 7931 SKIP_BLANKS | 7945 SKIP_BLANKS |
| 7932 } else if (CUR == '*') { | 7946 } else if (CUR == '*') { |
| 7933 NEXT | 7947 NEXT |
| 7934 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, | 7948 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, |
| 7935 0, -1); | 7949 0, -1); |
| 7936 SKIP_BLANKS | 7950 SKIP_BLANKS |
| 7937 } | 7951 } |
| 7938 return(ret); | 7952 return(ret); |
| 7939 } | 7953 } |
| 7940 | 7954 |
| 7941 | 7955 |
| 7942 static xmlExpNodePtr | 7956 static xmlExpNodePtr |
| 7943 xmlExpParseSeq(xmlExpCtxtPtr ctxt) { | 7957 xmlExpParseSeq(xmlExpCtxtPtr ctxt) { |
| 7944 xmlExpNodePtr ret, right; | 7958 xmlExpNodePtr ret, right; |
| 7945 | 7959 |
| 7946 ret = xmlExpParseOr(ctxt); | 7960 ret = xmlExpParseOr(ctxt); |
| 7947 SKIP_BLANKS | 7961 SKIP_BLANKS |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 8049 xmlExpDumpInt(buf, c, 0); | 8063 xmlExpDumpInt(buf, c, 0); |
| 8050 xmlBufferWriteChar(buf, " | "); | 8064 xmlBufferWriteChar(buf, " | "); |
| 8051 c = expr->exp_right; | 8065 c = expr->exp_right; |
| 8052 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) | 8066 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) |
| 8053 xmlExpDumpInt(buf, c, 1); | 8067 xmlExpDumpInt(buf, c, 1); |
| 8054 else | 8068 else |
| 8055 xmlExpDumpInt(buf, c, 0); | 8069 xmlExpDumpInt(buf, c, 0); |
| 8056 break; | 8070 break; |
| 8057 case XML_EXP_COUNT: { | 8071 case XML_EXP_COUNT: { |
| 8058 char rep[40]; | 8072 char rep[40]; |
| 8059 » | 8073 |
| 8060 c = expr->exp_left; | 8074 c = expr->exp_left; |
| 8061 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) | 8075 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) |
| 8062 xmlExpDumpInt(buf, c, 1); | 8076 xmlExpDumpInt(buf, c, 1); |
| 8063 else | 8077 else |
| 8064 xmlExpDumpInt(buf, c, 0); | 8078 xmlExpDumpInt(buf, c, 0); |
| 8065 if ((expr->exp_min == 0) && (expr->exp_max == 1)) { | 8079 if ((expr->exp_min == 0) && (expr->exp_max == 1)) { |
| 8066 rep[0] = '?'; | 8080 rep[0] = '?'; |
| 8067 rep[1] = 0; | 8081 rep[1] = 0; |
| 8068 } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) { | 8082 } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) { |
| 8069 rep[0] = '*'; | 8083 rep[0] = '*'; |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 8144 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) { | 8158 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) { |
| 8145 if (ctxt == NULL) | 8159 if (ctxt == NULL) |
| 8146 return(-1); | 8160 return(-1); |
| 8147 return(ctxt->nb_cons); | 8161 return(ctxt->nb_cons); |
| 8148 } | 8162 } |
| 8149 | 8163 |
| 8150 #endif /* LIBXML_EXPR_ENABLED */ | 8164 #endif /* LIBXML_EXPR_ENABLED */ |
| 8151 #define bottom_xmlregexp | 8165 #define bottom_xmlregexp |
| 8152 #include "elfgcchack.h" | 8166 #include "elfgcchack.h" |
| 8153 #endif /* LIBXML_REGEXP_ENABLED */ | 8167 #endif /* LIBXML_REGEXP_ENABLED */ |
| OLD | NEW |