| OLD | NEW |
| 1 /* | 1 /* |
| 2 regexec.c - TRE POSIX compatible matching functions (and more). | 2 regexec.c - TRE POSIX compatible matching functions (and more). |
| 3 | 3 |
| 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> | 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> |
| 5 All rights reserved. | 5 All rights reserved. |
| 6 | 6 |
| 7 Redistribution and use in source and binary forms, with or without | 7 Redistribution and use in source and binary forms, with or without |
| 8 modification, are permitted provided that the following conditions | 8 modification, are permitted provided that the following conditions |
| 9 are met: | 9 are met: |
| 10 | 10 |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 130 classes++; | 130 classes++; |
| 131 return 0; /* No match. */ | 131 return 0; /* No match. */ |
| 132 } | 132 } |
| 133 | 133 |
| 134 /*********************************************************************** | 134 /*********************************************************************** |
| 135 from tre-match-parallel.c | 135 from tre-match-parallel.c |
| 136 ***********************************************************************/ | 136 ***********************************************************************/ |
| 137 | 137 |
| 138 /* | 138 /* |
| 139 This algorithm searches for matches basically by reading characters | 139 This algorithm searches for matches basically by reading characters |
| 140 in the searched string one by one, starting at the beginning.» All | 140 in the searched string one by one, starting at the beginning. All |
| 141 matching paths in the TNFA are traversed in parallel.» When two or | 141 matching paths in the TNFA are traversed in parallel. When two or |
| 142 more paths reach the same state, exactly one is chosen according to | 142 more paths reach the same state, exactly one is chosen according to |
| 143 tag ordering rules; if returning submatches is not required it does | 143 tag ordering rules; if returning submatches is not required it does |
| 144 not matter which path is chosen. | 144 not matter which path is chosen. |
| 145 | 145 |
| 146 The worst case time required for finding the leftmost and longest | 146 The worst case time required for finding the leftmost and longest |
| 147 match, or determining that there is no match, is always linearly | 147 match, or determining that there is no match, is always linearly |
| 148 dependent on the length of the text being searched. | 148 dependent on the length of the text being searched. |
| 149 | 149 |
| 150 This algorithm cannot handle TNFAs with back referencing nodes. | 150 This algorithm cannot handle TNFAs with back referencing nodes. |
| 151 See `tre-match-backtrack.c'. | 151 See `tre-match-backtrack.c'. |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 193 | 193 |
| 194 #ifdef TRE_MBSTATE | 194 #ifdef TRE_MBSTATE |
| 195 memset(&mbstate, '\0', sizeof(mbstate)); | 195 memset(&mbstate, '\0', sizeof(mbstate)); |
| 196 #endif /* TRE_MBSTATE */ | 196 #endif /* TRE_MBSTATE */ |
| 197 | 197 |
| 198 if (!match_tags) | 198 if (!match_tags) |
| 199 num_tags = 0; | 199 num_tags = 0; |
| 200 else | 200 else |
| 201 num_tags = tnfa->num_tags; | 201 num_tags = tnfa->num_tags; |
| 202 | 202 |
| 203 /* Allocate memory for temporary data required for matching.» This needs to | 203 /* Allocate memory for temporary data required for matching. This needs to |
| 204 be done for every matching operation to be thread safe. This allocates | 204 be done for every matching operation to be thread safe. This allocates |
| 205 everything in a single large block from the stack frame using alloca() | 205 everything in a single large block from the stack frame using alloca() |
| 206 or with malloc() if alloca is unavailable. */ | 206 or with malloc() if alloca is unavailable. */ |
| 207 { | 207 { |
| 208 int tbytes, rbytes, pbytes, xbytes, total_bytes; | 208 int tbytes, rbytes, pbytes, xbytes, total_bytes; |
| 209 char* tmp_buf; | 209 char* tmp_buf; |
| 210 /* Compute the length of the block we need. */ | 210 /* Compute the length of the block we need. */ |
| 211 tbytes = sizeof(*tmp_tags) * num_tags; | 211 tbytes = sizeof(*tmp_tags) * num_tags; |
| 212 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); | 212 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); |
| 213 pbytes = sizeof(*reach_pos) * tnfa->num_states; | 213 pbytes = sizeof(*reach_pos) * tnfa->num_states; |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 416 from tre-match-backtrack.c | 416 from tre-match-backtrack.c |
| 417 ***********************************************************************/ | 417 ***********************************************************************/ |
| 418 | 418 |
| 419 /* | 419 /* |
| 420 This matcher is for regexps that use back referencing. Regexp matching | 420 This matcher is for regexps that use back referencing. Regexp matching |
| 421 with back referencing is an NP-complete problem on the number of back | 421 with back referencing is an NP-complete problem on the number of back |
| 422 references. The easiest way to match them is to use a backtracking | 422 references. The easiest way to match them is to use a backtracking |
| 423 routine which basically goes through all possible paths in the TNFA | 423 routine which basically goes through all possible paths in the TNFA |
| 424 and chooses the one which results in the best (leftmost and longest) | 424 and chooses the one which results in the best (leftmost and longest) |
| 425 match. This can be spectacularly expensive and may run out of stack | 425 match. This can be spectacularly expensive and may run out of stack |
| 426 space, but there really is no better known generic algorithm.» Quoting | 426 space, but there really is no better known generic algorithm. Quoting |
| 427 Henry Spencer from comp.compilers: | 427 Henry Spencer from comp.compilers: |
| 428 <URL: http://compilers.iecc.com/comparch/article/93-03-102> | 428 <URL: http://compilers.iecc.com/comparch/article/93-03-102> |
| 429 | 429 |
| 430 POSIX.2 REs require longest match, which is really exciting to | 430 POSIX.2 REs require longest match, which is really exciting to |
| 431 implement since the obsolete ("basic") variant also includes | 431 implement since the obsolete ("basic") variant also includes |
| 432 \<digit>. I haven't found a better way of tackling this than doing | 432 \<digit>. I haven't found a better way of tackling this than doing |
| 433 a preliminary match using a DFA (or simulation) on a modified RE | 433 a preliminary match using a DFA (or simulation) on a modified RE |
| 434 that just replicates subREs for \<digit>, and then doing a | 434 that just replicates subREs for \<digit>, and then doing a |
| 435 backtracking match to determine whether the subRE matches were | 435 backtracking match to determine whether the subRE matches were |
| 436 right. This can be rather slow, but I console myself with the | 436 right. This can be rather slow, but I console myself with the |
| (...skipping 312 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 749 to take this transition instead of the first one, so we | 749 to take this transition instead of the first one, so we |
| 750 push this transition in the backtracking stack so we can | 750 push this transition in the backtracking stack so we can |
| 751 jump back here if needed. */ | 751 jump back here if needed. */ |
| 752 BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, | 752 BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, |
| 753 next_c, tags, mbstate); | 753 next_c, tags, mbstate); |
| 754 { | 754 { |
| 755 int* tmp; | 755 int* tmp; |
| 756 for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) | 756 for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) |
| 757 stack->item.tags[*tmp] = pos; | 757 stack->item.tags[*tmp] = pos; |
| 758 } | 758 } |
| 759 #if 0 /* XXX - it's important not to look at all transitions here to keep \ | |
| 760 the stack small! */ | |
| 761 break; | |
| 762 #endif | |
| 763 } | 759 } |
| 764 } | 760 } |
| 765 } | 761 } |
| 766 | 762 |
| 767 if (next_state != NULL) { | 763 if (next_state != NULL) { |
| 768 /* Matching transitions were found. Take the first one. */ | 764 /* Matching transitions were found. Take the first one. */ |
| 769 state = next_state; | 765 state = next_state; |
| 770 | 766 |
| 771 /* Update the tag values. */ | 767 /* Update the tag values. */ |
| 772 if (next_tags) | 768 if (next_tags) |
| (...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 909 status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); | 905 status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); |
| 910 } | 906 } |
| 911 | 907 |
| 912 if (status == REG_OK) | 908 if (status == REG_OK) |
| 913 /* A match was found, so fill the submatch registers. */ | 909 /* A match was found, so fill the submatch registers. */ |
| 914 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); | 910 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); |
| 915 if (tags) | 911 if (tags) |
| 916 xfree(tags); | 912 xfree(tags); |
| 917 return status; | 913 return status; |
| 918 } | 914 } |
| OLD | NEW |