| OLD | NEW |
| 1 /* | 1 /* |
| 2 regexec.c - TRE POSIX compatible matching functions (and more). | 2 regexec.c - TRE POSIX compatible matching functions (and more). |
| 3 | 3 |
| 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> | 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> |
| 5 All rights reserved. | 5 All rights reserved. |
| 6 | 6 |
| 7 Redistribution and use in source and binary forms, with or without | 7 Redistribution and use in source and binary forms, with or without |
| 8 modification, are permitted provided that the following conditions | 8 modification, are permitted provided that the following conditions |
| 9 are met: | 9 are met: |
| 10 | 10 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 #include <wchar.h> | 34 #include <wchar.h> |
| 35 #include <wctype.h> | 35 #include <wctype.h> |
| 36 #include <limits.h> | 36 #include <limits.h> |
| 37 | 37 |
| 38 #include <regex.h> | 38 #include <regex.h> |
| 39 | 39 |
| 40 #include "tre.h" | 40 #include "tre.h" |
| 41 | 41 |
| 42 #include <assert.h> | 42 #include <assert.h> |
| 43 | 43 |
| 44 static void | 44 static void tre_fill_pmatch(size_t nmatch, |
| 45 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, | 45 regmatch_t pmatch[], |
| 46 » » const tre_tnfa_t *tnfa, int *tags, int match_eo); | 46 int cflags, |
| 47 const tre_tnfa_t* tnfa, |
| 48 int* tags, |
| 49 int match_eo); |
| 47 | 50 |
| 48 /*********************************************************************** | 51 /*********************************************************************** |
| 49 from tre-match-utils.h | 52 from tre-match-utils.h |
| 50 ***********************************************************************/ | 53 ***********************************************************************/ |
| 51 | 54 |
| 52 #define GET_NEXT_WCHAR() do { \ | 55 #define GET_NEXT_WCHAR() \ |
| 53 prev_c = next_c; pos += pos_add_next; \ | 56 do { \ |
| 54 if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \ | 57 prev_c = next_c; \ |
| 55 if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \ | 58 pos += pos_add_next; \ |
| 56 else pos_add_next++; \ | 59 if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \ |
| 57 } \ | 60 if (pos_add_next < 0) { \ |
| 58 str_byte += pos_add_next; \ | 61 ret = REG_NOMATCH; \ |
| 62 goto error_exit; \ |
| 63 } else \ |
| 64 pos_add_next++; \ |
| 65 } \ |
| 66 str_byte += pos_add_next; \ |
| 59 } while (0) | 67 } while (0) |
| 60 | 68 |
| 61 #define IS_WORD_CHAR(c)» ((c) == L'_' || tre_isalnum(c)) | 69 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) |
| 62 | 70 |
| 63 #define CHECK_ASSERTIONS(assertions)» » » » » \ | 71 #define CHECK_ASSERTIONS(assertions) \ |
| 64 (((assertions & ASSERT_AT_BOL)» » » » » \ | 72 (((assertions & ASSERT_AT_BOL) && (pos > 0 || reg_notbol) && \ |
| 65 && (pos > 0 || reg_notbol)» » » » » » \ | 73 (prev_c != L'\n' || !reg_newline)) || \ |
| 66 && (prev_c != L'\n' || !reg_newline))» » » » \ | 74 ((assertions & ASSERT_AT_EOL) && (next_c != L'\0' || reg_noteol) && \ |
| 67 || ((assertions & ASSERT_AT_EOL)» » » » » \ | 75 (next_c != L'\n' || !reg_newline)) || \ |
| 68 && (next_c != L'\0' || reg_noteol)» » » » \ | 76 ((assertions & ASSERT_AT_BOW) && \ |
| 69 && (next_c != L'\n' || !reg_newline))» » » » \ | 77 (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) || \ |
| 70 || ((assertions & ASSERT_AT_BOW)» » » » » \ | 78 ((assertions & ASSERT_AT_EOW) && \ |
| 71 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))» \ | 79 (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) || \ |
| 72 || ((assertions & ASSERT_AT_EOW)» » » » » \ | 80 ((assertions & ASSERT_AT_WB) && \ |
| 73 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))» » \ | 81 (pos != 0 && next_c != L'\0' && \ |
| 74 || ((assertions & ASSERT_AT_WB)» » » » » \ | 82 IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) || \ |
| 75 && (pos != 0 && next_c != L'\0'» » » » » \ | 83 ((assertions & ASSERT_AT_WB_NEG) && \ |
| 76 » && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))» » \ | 84 (pos == 0 || next_c == L'\0' || \ |
| 77 || ((assertions & ASSERT_AT_WB_NEG)» » » » » \ | 85 IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) |
| 78 && (pos == 0 || next_c == L'\0'» » » » » \ | |
| 79 » || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) | |
| 80 | 86 |
| 81 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ | 87 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ |
| 82 (((trans_i->assertions & ASSERT_CHAR_CLASS) \ | 88 (((trans_i->assertions & ASSERT_CHAR_CLASS) && \ |
| 83 && !(tnfa->cflags & REG_ICASE) \ | 89 !(tnfa->cflags & REG_ICASE) && \ |
| 84 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ | 90 !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) || \ |
| 85 || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ | 91 ((trans_i->assertions & ASSERT_CHAR_CLASS) && (tnfa->cflags & REG_ICASE) && \ |
| 86 && (tnfa->cflags & REG_ICASE) \ | 92 !tre_isctype(tre_tolower((tre_cint_t)prev_c), trans_i->u.class) && \ |
| 87 && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ | 93 !tre_isctype(tre_toupper((tre_cint_t)prev_c), trans_i->u.class)) || \ |
| 88 » && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ | 94 ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) && \ |
| 89 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ | 95 tre_neg_char_classes_match(trans_i->neg_classes, (tre_cint_t)prev_c, \ |
| 90 && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ | 96 tnfa->cflags & REG_ICASE))) |
| 91 tnfa->cflags & REG_ICASE))) | |
| 92 | |
| 93 | |
| 94 | |
| 95 | 97 |
| 96 /* Returns 1 if `t1' wins `t2', 0 otherwise. */ | 98 /* Returns 1 if `t1' wins `t2', 0 otherwise. */ |
| 97 static int | 99 static int tre_tag_order(int num_tags, |
| 98 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, | 100 tre_tag_direction_t* tag_directions, |
| 99 » int *t1, int *t2) | 101 int* t1, |
| 100 { | 102 int* t2) { |
| 101 int i; | 103 int i; |
| 102 for (i = 0; i < num_tags; i++) | 104 for (i = 0; i < num_tags; i++) { |
| 103 { | 105 if (tag_directions[i] == TRE_TAG_MINIMIZE) { |
| 104 if (tag_directions[i] == TRE_TAG_MINIMIZE) | 106 if (t1[i] < t2[i]) |
| 105 » { | 107 return 1; |
| 106 » if (t1[i] < t2[i]) | 108 if (t1[i] > t2[i]) |
| 107 » return 1; | 109 return 0; |
| 108 » if (t1[i] > t2[i]) | 110 } else { |
| 109 » return 0; | 111 if (t1[i] > t2[i]) |
| 110 » } | 112 return 1; |
| 111 else | 113 if (t1[i] < t2[i]) |
| 112 » { | 114 return 0; |
| 113 » if (t1[i] > t2[i]) | |
| 114 » return 1; | |
| 115 » if (t1[i] < t2[i]) | |
| 116 » return 0; | |
| 117 » } | |
| 118 } | 115 } |
| 116 } |
| 119 /* assert(0);*/ | 117 /* assert(0);*/ |
| 120 return 0; | 118 return 0; |
| 121 } | 119 } |
| 122 | 120 |
| 123 static int | 121 static int tre_neg_char_classes_match(tre_ctype_t* classes, |
| 124 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) | 122 tre_cint_t wc, |
| 125 { | 123 int icase) { |
| 126 while (*classes != (tre_ctype_t)0) | 124 while (*classes != (tre_ctype_t)0) |
| 127 if ((!icase && tre_isctype(wc, *classes)) | 125 if ((!icase && tre_isctype(wc, *classes)) || |
| 128 » || (icase && (tre_isctype(tre_toupper(wc), *classes) | 126 (icase && (tre_isctype(tre_toupper(wc), *classes) || |
| 129 » » || tre_isctype(tre_tolower(wc), *classes)))) | 127 tre_isctype(tre_tolower(wc), *classes)))) |
| 130 return 1; /* Match. */ | 128 return 1; /* Match. */ |
| 131 else | 129 else |
| 132 classes++; | 130 classes++; |
| 133 return 0; /* No match. */ | 131 return 0; /* No match. */ |
| 134 } | 132 } |
| 135 | 133 |
| 136 | |
| 137 /*********************************************************************** | 134 /*********************************************************************** |
| 138 from tre-match-parallel.c | 135 from tre-match-parallel.c |
| 139 ***********************************************************************/ | 136 ***********************************************************************/ |
| 140 | 137 |
| 141 /* | 138 /* |
| 142 This algorithm searches for matches basically by reading characters | 139 This algorithm searches for matches basically by reading characters |
| 143 in the searched string one by one, starting at the beginning. All | 140 in the searched string one by one, starting at the beginning. All |
| 144 matching paths in the TNFA are traversed in parallel. When two or | 141 matching paths in the TNFA are traversed in parallel. When two or |
| 145 more paths reach the same state, exactly one is chosen according to | 142 more paths reach the same state, exactly one is chosen according to |
| 146 tag ordering rules; if returning submatches is not required it does | 143 tag ordering rules; if returning submatches is not required it does |
| 147 not matter which path is chosen. | 144 not matter which path is chosen. |
| 148 | 145 |
| 149 The worst case time required for finding the leftmost and longest | 146 The worst case time required for finding the leftmost and longest |
| 150 match, or determining that there is no match, is always linearly | 147 match, or determining that there is no match, is always linearly |
| 151 dependent on the length of the text being searched. | 148 dependent on the length of the text being searched. |
| 152 | 149 |
| 153 This algorithm cannot handle TNFAs with back referencing nodes. | 150 This algorithm cannot handle TNFAs with back referencing nodes. |
| 154 See `tre-match-backtrack.c'. | 151 See `tre-match-backtrack.c'. |
| 155 */ | 152 */ |
| 156 | 153 |
| 157 typedef struct { | 154 typedef struct { |
| 158 tre_tnfa_transition_t *state; | 155 tre_tnfa_transition_t* state; |
| 159 int *tags; | 156 int* tags; |
| 160 } tre_tnfa_reach_t; | 157 } tre_tnfa_reach_t; |
| 161 | 158 |
| 162 typedef struct { | 159 typedef struct { |
| 163 int pos; | 160 int pos; |
| 164 int **tags; | 161 int** tags; |
| 165 } tre_reach_pos_t; | 162 } tre_reach_pos_t; |
| 166 | 163 |
| 167 | 164 static reg_errcode_t tre_tnfa_run_parallel(const tre_tnfa_t* tnfa, |
| 168 static reg_errcode_t | 165 const void* string, |
| 169 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, | 166 int* match_tags, |
| 170 » » int *match_tags, int eflags, | 167 int eflags, |
| 171 » » int *match_end_ofs) | 168 int* match_end_ofs) { |
| 172 { | |
| 173 /* State variables required by GET_NEXT_WCHAR. */ | 169 /* State variables required by GET_NEXT_WCHAR. */ |
| 174 tre_char_t prev_c = 0, next_c = 0; | 170 tre_char_t prev_c = 0, next_c = 0; |
| 175 const char *str_byte = string; | 171 const char* str_byte = string; |
| 176 int pos = -1; | 172 int pos = -1; |
| 177 int pos_add_next = 1; | 173 int pos_add_next = 1; |
| 178 #ifdef TRE_MBSTATE | 174 #ifdef TRE_MBSTATE |
| 179 mbstate_t mbstate; | 175 mbstate_t mbstate; |
| 180 #endif /* TRE_MBSTATE */ | 176 #endif /* TRE_MBSTATE */ |
| 181 int reg_notbol = eflags & REG_NOTBOL; | 177 int reg_notbol = eflags & REG_NOTBOL; |
| 182 int reg_noteol = eflags & REG_NOTEOL; | 178 int reg_noteol = eflags & REG_NOTEOL; |
| 183 int reg_newline = tnfa->cflags & REG_NEWLINE; | 179 int reg_newline = tnfa->cflags & REG_NEWLINE; |
| 184 reg_errcode_t ret; | 180 reg_errcode_t ret; |
| 185 | 181 |
| 186 char *buf; | 182 char* buf; |
| 187 tre_tnfa_transition_t *trans_i; | 183 tre_tnfa_transition_t* trans_i; |
| 188 tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; | 184 tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; |
| 189 tre_reach_pos_t *reach_pos; | 185 tre_reach_pos_t* reach_pos; |
| 190 int *tag_i; | 186 int* tag_i; |
| 191 int num_tags, i; | 187 int num_tags, i; |
| 192 | 188 |
| 193 int match_eo = -1;» /* end offset of match (-1 if no match found yet) */ | 189 int match_eo = -1; /* end offset of match (-1 if no match found yet) */ |
| 194 int new_match = 0; | 190 int new_match = 0; |
| 195 int *tmp_tags = NULL; | 191 int* tmp_tags = NULL; |
| 196 int *tmp_iptr; | 192 int* tmp_iptr; |
| 197 | 193 |
| 198 #ifdef TRE_MBSTATE | 194 #ifdef TRE_MBSTATE |
| 199 memset(&mbstate, '\0', sizeof(mbstate)); | 195 memset(&mbstate, '\0', sizeof(mbstate)); |
| 200 #endif /* TRE_MBSTATE */ | 196 #endif /* TRE_MBSTATE */ |
| 201 | 197 |
| 202 if (!match_tags) | 198 if (!match_tags) |
| 203 num_tags = 0; | 199 num_tags = 0; |
| 204 else | 200 else |
| 205 num_tags = tnfa->num_tags; | 201 num_tags = tnfa->num_tags; |
| 206 | 202 |
| 207 /* Allocate memory for temporary data required for matching. This needs to | 203 /* Allocate memory for temporary data required for matching. This needs to |
| 208 be done for every matching operation to be thread safe. This allocates | 204 be done for every matching operation to be thread safe. This allocates |
| 209 everything in a single large block from the stack frame using alloca() | 205 everything in a single large block from the stack frame using alloca() |
| 210 or with malloc() if alloca is unavailable. */ | 206 or with malloc() if alloca is unavailable. */ |
| 211 { | 207 { |
| 212 int tbytes, rbytes, pbytes, xbytes, total_bytes; | 208 int tbytes, rbytes, pbytes, xbytes, total_bytes; |
| 213 char *tmp_buf; | 209 char* tmp_buf; |
| 214 /* Compute the length of the block we need. */ | 210 /* Compute the length of the block we need. */ |
| 215 tbytes = sizeof(*tmp_tags) * num_tags; | 211 tbytes = sizeof(*tmp_tags) * num_tags; |
| 216 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); | 212 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); |
| 217 pbytes = sizeof(*reach_pos) * tnfa->num_states; | 213 pbytes = sizeof(*reach_pos) * tnfa->num_states; |
| 218 xbytes = sizeof(int) * num_tags; | 214 xbytes = sizeof(int) * num_tags; |
| 219 total_bytes = | 215 total_bytes = (sizeof(long) - 1) * 4 /* for alignment paddings */ |
| 220 (sizeof(long) - 1) * 4 /* for alignment paddings */ | 216 + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; |
| 221 + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; | |
| 222 | 217 |
| 223 /* Allocate the memory. */ | 218 /* Allocate the memory. */ |
| 224 buf = xmalloc((unsigned)total_bytes); | 219 buf = xmalloc((unsigned)total_bytes); |
| 225 if (buf == NULL) | 220 if (buf == NULL) |
| 226 return REG_ESPACE; | 221 return REG_ESPACE; |
| 227 memset(buf, 0, (size_t)total_bytes); | 222 memset(buf, 0, (size_t)total_bytes); |
| 228 | 223 |
| 229 /* Get the various pointers within tmp_buf (properly aligned). */ | 224 /* Get the various pointers within tmp_buf (properly aligned). */ |
| 230 tmp_tags = (void *)buf; | 225 tmp_tags = (void*)buf; |
| 231 tmp_buf = buf + tbytes; | 226 tmp_buf = buf + tbytes; |
| 232 tmp_buf += ALIGN(tmp_buf, long); | 227 tmp_buf += ALIGN(tmp_buf, long); |
| 233 reach_next = (void *)tmp_buf; | 228 reach_next = (void*)tmp_buf; |
| 234 tmp_buf += rbytes; | 229 tmp_buf += rbytes; |
| 235 tmp_buf += ALIGN(tmp_buf, long); | 230 tmp_buf += ALIGN(tmp_buf, long); |
| 236 reach = (void *)tmp_buf; | 231 reach = (void*)tmp_buf; |
| 237 tmp_buf += rbytes; | 232 tmp_buf += rbytes; |
| 238 tmp_buf += ALIGN(tmp_buf, long); | 233 tmp_buf += ALIGN(tmp_buf, long); |
| 239 reach_pos = (void *)tmp_buf; | 234 reach_pos = (void*)tmp_buf; |
| 240 tmp_buf += pbytes; | 235 tmp_buf += pbytes; |
| 241 tmp_buf += ALIGN(tmp_buf, long); | 236 tmp_buf += ALIGN(tmp_buf, long); |
| 242 for (i = 0; i < tnfa->num_states; i++) | 237 for (i = 0; i < tnfa->num_states; i++) { |
| 243 { | 238 reach[i].tags = (void*)tmp_buf; |
| 244 » reach[i].tags = (void *)tmp_buf; | 239 tmp_buf += xbytes; |
| 245 » tmp_buf += xbytes; | 240 reach_next[i].tags = (void*)tmp_buf; |
| 246 » reach_next[i].tags = (void *)tmp_buf; | 241 tmp_buf += xbytes; |
| 247 » tmp_buf += xbytes; | 242 } |
| 248 } | |
| 249 } | 243 } |
| 250 | 244 |
| 251 for (i = 0; i < tnfa->num_states; i++) | 245 for (i = 0; i < tnfa->num_states; i++) |
| 252 reach_pos[i].pos = -1; | 246 reach_pos[i].pos = -1; |
| 253 | 247 |
| 254 GET_NEXT_WCHAR(); | 248 GET_NEXT_WCHAR(); |
| 255 pos = 0; | 249 pos = 0; |
| 256 | 250 |
| 257 reach_next_i = reach_next; | 251 reach_next_i = reach_next; |
| 258 while (1) | 252 while (1) { |
| 259 { | 253 /* If no match found yet, add the initial states to `reach_next'. */ |
| 260 /* If no match found yet, add the initial states to `reach_next'. */ | 254 if (match_eo < 0) { |
| 261 if (match_eo < 0) | 255 trans_i = tnfa->initial; |
| 262 » { | 256 while (trans_i->state != NULL) { |
| 263 » trans_i = tnfa->initial; | 257 if (reach_pos[trans_i->state_id].pos < pos) { |
| 264 » while (trans_i->state != NULL) | 258 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) { |
| 265 » { | 259 trans_i++; |
| 266 » if (reach_pos[trans_i->state_id].pos < pos) | 260 continue; |
| 267 » » { | 261 } |
| 268 » » if (trans_i->assertions | |
| 269 » » && CHECK_ASSERTIONS(trans_i->assertions)) | |
| 270 » » { | |
| 271 » » trans_i++; | |
| 272 » » continue; | |
| 273 » » } | |
| 274 | 262 |
| 275 » » reach_next_i->state = trans_i->state; | 263 reach_next_i->state = trans_i->state; |
| 276 » » for (i = 0; i < num_tags; i++) | 264 for (i = 0; i < num_tags; i++) |
| 277 » » reach_next_i->tags[i] = -1; | 265 reach_next_i->tags[i] = -1; |
| 278 » » tag_i = trans_i->tags; | 266 tag_i = trans_i->tags; |
| 279 » » if (tag_i) | 267 if (tag_i) |
| 280 » » while (*tag_i >= 0) | 268 while (*tag_i >= 0) { |
| 281 » » { | 269 if (*tag_i < num_tags) |
| 282 » » » if (*tag_i < num_tags) | 270 reach_next_i->tags[*tag_i] = pos; |
| 283 » » » reach_next_i->tags[*tag_i] = pos; | 271 tag_i++; |
| 284 » » » tag_i++; | 272 } |
| 285 » » } | 273 if (reach_next_i->state == tnfa->final) { |
| 286 » » if (reach_next_i->state == tnfa->final) | 274 match_eo = pos; |
| 287 » » { | 275 new_match = 1; |
| 288 » » match_eo = pos; | 276 for (i = 0; i < num_tags; i++) |
| 289 » » new_match = 1; | 277 match_tags[i] = reach_next_i->tags[i]; |
| 290 » » for (i = 0; i < num_tags; i++) | 278 } |
| 291 » » » match_tags[i] = reach_next_i->tags[i]; | 279 reach_pos[trans_i->state_id].pos = pos; |
| 292 » » } | 280 reach_pos[trans_i->state_id].tags = &reach_next_i->tags; |
| 293 » » reach_pos[trans_i->state_id].pos = pos; | 281 reach_next_i++; |
| 294 » » reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | 282 } |
| 295 » » reach_next_i++; | 283 trans_i++; |
| 296 » » } | 284 } |
| 297 » trans_i++; | 285 reach_next_i->state = NULL; |
| 298 » } | 286 } else { |
| 299 » reach_next_i->state = NULL; | 287 if (num_tags == 0 || reach_next_i == reach_next) |
| 300 » } | 288 /* We have found a match. */ |
| 301 else | 289 break; |
| 302 » { | 290 } |
| 303 » if (num_tags == 0 || reach_next_i == reach_next) | |
| 304 » /* We have found a match. */ | |
| 305 » break; | |
| 306 » } | |
| 307 | 291 |
| 308 /* Check for end of string. */ | 292 /* Check for end of string. */ |
| 309 if (!next_c) break; | 293 if (!next_c) |
| 294 break; |
| 310 | 295 |
| 311 GET_NEXT_WCHAR(); | 296 GET_NEXT_WCHAR(); |
| 297 |
| 298 /* Swap `reach' and `reach_next'. */ |
| 299 reach_i = reach; |
| 300 reach = reach_next; |
| 301 reach_next = reach_i; |
| 302 |
| 303 /* For each state in `reach', weed out states that don't fulfill the |
| 304 minimal matching conditions. */ |
| 305 if (tnfa->num_minimals && new_match) { |
| 306 new_match = 0; |
| 307 reach_next_i = reach_next; |
| 308 for (reach_i = reach; reach_i->state; reach_i++) { |
| 309 int skip = 0; |
| 310 for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) { |
| 311 int end = tnfa->minimal_tags[i]; |
| 312 int start = tnfa->minimal_tags[i + 1]; |
| 313 if (end >= num_tags) { |
| 314 skip = 1; |
| 315 break; |
| 316 } else if (reach_i->tags[start] == match_tags[start] && |
| 317 reach_i->tags[end] < match_tags[end]) { |
| 318 skip = 1; |
| 319 break; |
| 320 } |
| 321 } |
| 322 if (!skip) { |
| 323 reach_next_i->state = reach_i->state; |
| 324 tmp_iptr = reach_next_i->tags; |
| 325 reach_next_i->tags = reach_i->tags; |
| 326 reach_i->tags = tmp_iptr; |
| 327 reach_next_i++; |
| 328 } |
| 329 } |
| 330 reach_next_i->state = NULL; |
| 312 | 331 |
| 313 /* Swap `reach' and `reach_next'. */ | 332 /* Swap `reach' and `reach_next'. */ |
| 314 reach_i = reach; | 333 reach_i = reach; |
| 315 reach = reach_next; | 334 reach = reach_next; |
| 316 reach_next = reach_i; | 335 reach_next = reach_i; |
| 336 } |
| 317 | 337 |
| 318 /* For each state in `reach', weed out states that don't fulfill the | 338 /* For each state in `reach' see if there is a transition leaving with |
| 319 » minimal matching conditions. */ | 339 the current input symbol to a state not yet in `reach_next', and |
| 320 if (tnfa->num_minimals && new_match) | 340 add the destination states to `reach_next'. */ |
| 321 » { | 341 reach_next_i = reach_next; |
| 322 » new_match = 0; | 342 for (reach_i = reach; reach_i->state; reach_i++) { |
| 323 » reach_next_i = reach_next; | 343 for (trans_i = reach_i->state; trans_i->state; trans_i++) { |
| 324 » for (reach_i = reach; reach_i->state; reach_i++) | 344 /* Does this transition match the input symbol? */ |
| 325 » { | 345 if (trans_i->code_min <= (tre_cint_t)prev_c && |
| 326 » int skip = 0; | 346 trans_i->code_max >= (tre_cint_t)prev_c) { |
| 327 » for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) | 347 if (trans_i->assertions && |
| 328 » » { | 348 (CHECK_ASSERTIONS(trans_i->assertions) || |
| 329 » » int end = tnfa->minimal_tags[i]; | 349 CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) { |
| 330 » » int start = tnfa->minimal_tags[i + 1]; | 350 continue; |
| 331 » » if (end >= num_tags) | 351 } |
| 332 » » { | |
| 333 » » skip = 1; | |
| 334 » » break; | |
| 335 » » } | |
| 336 » » else if (reach_i->tags[start] == match_tags[start] | |
| 337 » » » && reach_i->tags[end] < match_tags[end]) | |
| 338 » » { | |
| 339 » » skip = 1; | |
| 340 » » break; | |
| 341 » » } | |
| 342 » » } | |
| 343 » if (!skip) | |
| 344 » » { | |
| 345 » » reach_next_i->state = reach_i->state; | |
| 346 » » tmp_iptr = reach_next_i->tags; | |
| 347 » » reach_next_i->tags = reach_i->tags; | |
| 348 » » reach_i->tags = tmp_iptr; | |
| 349 » » reach_next_i++; | |
| 350 » » } | |
| 351 » } | |
| 352 » reach_next_i->state = NULL; | |
| 353 | 352 |
| 354 » /* Swap `reach' and `reach_next'. */ | 353 /* Compute the tags after this transition. */ |
| 355 » reach_i = reach; | 354 for (i = 0; i < num_tags; i++) |
| 356 » reach = reach_next; | 355 tmp_tags[i] = reach_i->tags[i]; |
| 357 » reach_next = reach_i; | 356 tag_i = trans_i->tags; |
| 358 » } | 357 if (tag_i != NULL) |
| 358 while (*tag_i >= 0) { |
| 359 if (*tag_i < num_tags) |
| 360 tmp_tags[*tag_i] = pos; |
| 361 tag_i++; |
| 362 } |
| 359 | 363 |
| 360 /* For each state in `reach' see if there is a transition leaving with | 364 if (reach_pos[trans_i->state_id].pos < pos) { |
| 361 » the current input symbol to a state not yet in `reach_next', and | 365 /* Found an unvisited node. */ |
| 362 » add the destination states to `reach_next'. */ | 366 reach_next_i->state = trans_i->state; |
| 363 reach_next_i = reach_next; | 367 tmp_iptr = reach_next_i->tags; |
| 364 for (reach_i = reach; reach_i->state; reach_i++) | 368 reach_next_i->tags = tmp_tags; |
| 365 » { | 369 tmp_tags = tmp_iptr; |
| 366 » for (trans_i = reach_i->state; trans_i->state; trans_i++) | 370 reach_pos[trans_i->state_id].pos = pos; |
| 367 » { | 371 reach_pos[trans_i->state_id].tags = &reach_next_i->tags; |
| 368 » /* Does this transition match the input symbol? */ | |
| 369 » if (trans_i->code_min <= (tre_cint_t)prev_c && | |
| 370 » » trans_i->code_max >= (tre_cint_t)prev_c) | |
| 371 » » { | |
| 372 » » if (trans_i->assertions | |
| 373 » » && (CHECK_ASSERTIONS(trans_i->assertions) | |
| 374 » » » || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) | |
| 375 » » { | |
| 376 » » continue; | |
| 377 » » } | |
| 378 | 372 |
| 379 » » /* Compute the tags after this transition. */ | 373 if (reach_next_i->state == tnfa->final && |
| 380 » » for (i = 0; i < num_tags; i++) | 374 (match_eo == -1 || |
| 381 » » tmp_tags[i] = reach_i->tags[i]; | 375 (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0]))) { |
| 382 » » tag_i = trans_i->tags; | 376 match_eo = pos; |
| 383 » » if (tag_i != NULL) | 377 new_match = 1; |
| 384 » » while (*tag_i >= 0) | 378 for (i = 0; i < num_tags; i++) |
| 385 » » { | 379 match_tags[i] = reach_next_i->tags[i]; |
| 386 » » » if (*tag_i < num_tags) | 380 } |
| 387 » » » tmp_tags[*tag_i] = pos; | 381 reach_next_i++; |
| 388 » » » tag_i++; | |
| 389 » » } | |
| 390 | 382 |
| 391 » » if (reach_pos[trans_i->state_id].pos < pos) | 383 } else { |
| 392 » » { | 384 assert(reach_pos[trans_i->state_id].pos == pos); |
| 393 » » /* Found an unvisited node. */ | 385 /* Another path has also reached this state. We choose |
| 394 » » reach_next_i->state = trans_i->state; | 386 the winner by examining the tag values for both |
| 395 » » tmp_iptr = reach_next_i->tags; | 387 paths. */ |
| 396 » » reach_next_i->tags = tmp_tags; | 388 if (tre_tag_order(num_tags, tnfa->tag_directions, tmp_tags, |
| 397 » » tmp_tags = tmp_iptr; | 389 *reach_pos[trans_i->state_id].tags)) { |
| 398 » » reach_pos[trans_i->state_id].pos = pos; | 390 /* The new path wins. */ |
| 399 » » reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | 391 tmp_iptr = *reach_pos[trans_i->state_id].tags; |
| 400 | 392 *reach_pos[trans_i->state_id].tags = tmp_tags; |
| 401 » » if (reach_next_i->state == tnfa->final | 393 if (trans_i->state == tnfa->final) { |
| 402 » » » && (match_eo == -1 | 394 match_eo = pos; |
| 403 » » » || (num_tags > 0 | 395 new_match = 1; |
| 404 » » » » && reach_next_i->tags[0] <= match_tags[0]))) | 396 for (i = 0; i < num_tags; i++) |
| 405 » » » { | 397 match_tags[i] = tmp_tags[i]; |
| 406 » » » match_eo = pos; | 398 } |
| 407 » » » new_match = 1; | 399 tmp_tags = tmp_iptr; |
| 408 » » » for (i = 0; i < num_tags; i++) | 400 } |
| 409 » » » match_tags[i] = reach_next_i->tags[i]; | 401 } |
| 410 » » » } | 402 } |
| 411 » » reach_next_i++; | 403 } |
| 412 | |
| 413 » » } | |
| 414 » » else | |
| 415 » » { | |
| 416 » » assert(reach_pos[trans_i->state_id].pos == pos); | |
| 417 » » /* Another path has also reached this state. We choose | |
| 418 » » » the winner by examining the tag values for both | |
| 419 » » » paths. */ | |
| 420 » » if (tre_tag_order(num_tags, tnfa->tag_directions, | |
| 421 » » » » » tmp_tags, | |
| 422 » » » » » *reach_pos[trans_i->state_id].tags)) | |
| 423 » » » { | |
| 424 » » » /* The new path wins. */ | |
| 425 » » » tmp_iptr = *reach_pos[trans_i->state_id].tags; | |
| 426 » » » *reach_pos[trans_i->state_id].tags = tmp_tags; | |
| 427 » » » if (trans_i->state == tnfa->final) | |
| 428 » » » { | |
| 429 » » » match_eo = pos; | |
| 430 » » » new_match = 1; | |
| 431 » » » for (i = 0; i < num_tags; i++) | |
| 432 » » » » match_tags[i] = tmp_tags[i]; | |
| 433 » » » } | |
| 434 » » » tmp_tags = tmp_iptr; | |
| 435 » » » } | |
| 436 » » } | |
| 437 » » } | |
| 438 » } | |
| 439 » } | |
| 440 reach_next_i->state = NULL; | |
| 441 } | 404 } |
| 405 reach_next_i->state = NULL; |
| 406 } |
| 442 | 407 |
| 443 *match_end_ofs = match_eo; | 408 *match_end_ofs = match_eo; |
| 444 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; | 409 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; |
| 445 error_exit: | 410 error_exit: |
| 446 xfree(buf); | 411 xfree(buf); |
| 447 return ret; | 412 return ret; |
| 448 } | 413 } |
| 449 | 414 |
| 450 | |
| 451 | |
| 452 /*********************************************************************** | 415 /*********************************************************************** |
| 453 from tre-match-backtrack.c | 416 from tre-match-backtrack.c |
| 454 ***********************************************************************/ | 417 ***********************************************************************/ |
| 455 | 418 |
| 456 /* | 419 /* |
| 457 This matcher is for regexps that use back referencing. Regexp matching | 420 This matcher is for regexps that use back referencing. Regexp matching |
| 458 with back referencing is an NP-complete problem on the number of back | 421 with back referencing is an NP-complete problem on the number of back |
| 459 references. The easiest way to match them is to use a backtracking | 422 references. The easiest way to match them is to use a backtracking |
| 460 routine which basically goes through all possible paths in the TNFA | 423 routine which basically goes through all possible paths in the TNFA |
| 461 and chooses the one which results in the best (leftmost and longest) | 424 and chooses the one which results in the best (leftmost and longest) |
| 462 match. This can be spectacularly expensive and may run out of stack | 425 match. This can be spectacularly expensive and may run out of stack |
| 463 space, but there really is no better known generic algorithm. Quoting | 426 space, but there really is no better known generic algorithm. Quoting |
| 464 Henry Spencer from comp.compilers: | 427 Henry Spencer from comp.compilers: |
| 465 <URL: http://compilers.iecc.com/comparch/article/93-03-102> | 428 <URL: http://compilers.iecc.com/comparch/article/93-03-102> |
| 466 | 429 |
| 467 POSIX.2 REs require longest match, which is really exciting to | 430 POSIX.2 REs require longest match, which is really exciting to |
| 468 implement since the obsolete ("basic") variant also includes | 431 implement since the obsolete ("basic") variant also includes |
| 469 \<digit>. I haven't found a better way of tackling this than doing | 432 \<digit>. I haven't found a better way of tackling this than doing |
| 470 a preliminary match using a DFA (or simulation) on a modified RE | 433 a preliminary match using a DFA (or simulation) on a modified RE |
| 471 that just replicates subREs for \<digit>, and then doing a | 434 that just replicates subREs for \<digit>, and then doing a |
| 472 backtracking match to determine whether the subRE matches were | 435 backtracking match to determine whether the subRE matches were |
| 473 right. This can be rather slow, but I console myself with the | 436 right. This can be rather slow, but I console myself with the |
| 474 thought that people who use \<digit> deserve very slow execution. | 437 thought that people who use \<digit> deserve very slow execution. |
| 475 (Pun unintentional but very appropriate.) | 438 (Pun unintentional but very appropriate.) |
| 476 | 439 |
| 477 */ | 440 */ |
| 478 | 441 |
| 479 typedef struct { | 442 typedef struct { |
| 480 int pos; | 443 int pos; |
| 481 const char *str_byte; | 444 const char* str_byte; |
| 482 tre_tnfa_transition_t *state; | 445 tre_tnfa_transition_t* state; |
| 483 int state_id; | 446 int state_id; |
| 484 int next_c; | 447 int next_c; |
| 485 int *tags; | 448 int* tags; |
| 486 #ifdef TRE_MBSTATE | 449 #ifdef TRE_MBSTATE |
| 487 mbstate_t mbstate; | 450 mbstate_t mbstate; |
| 488 #endif /* TRE_MBSTATE */ | 451 #endif /* TRE_MBSTATE */ |
| 489 } tre_backtrack_item_t; | 452 } tre_backtrack_item_t; |
| 490 | 453 |
| 491 typedef struct tre_backtrack_struct { | 454 typedef struct tre_backtrack_struct { |
| 492 tre_backtrack_item_t item; | 455 tre_backtrack_item_t item; |
| 493 struct tre_backtrack_struct *prev; | 456 struct tre_backtrack_struct* prev; |
| 494 struct tre_backtrack_struct *next; | 457 struct tre_backtrack_struct* next; |
| 495 } *tre_backtrack_t; | 458 } * tre_backtrack_t; |
| 496 | 459 |
| 497 #ifdef TRE_MBSTATE | 460 #ifdef TRE_MBSTATE |
| 498 #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) | 461 #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) |
| 499 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate | 462 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate |
| 500 #else /* !TRE_MBSTATE */ | 463 #else /* !TRE_MBSTATE */ |
| 501 #define BT_STACK_MBSTATE_IN | 464 #define BT_STACK_MBSTATE_IN |
| 502 #define BT_STACK_MBSTATE_OUT | 465 #define BT_STACK_MBSTATE_OUT |
| 503 #endif /* !TRE_MBSTATE */ | 466 #endif /* !TRE_MBSTATE */ |
| 504 | 467 |
| 505 #define tre_bt_mem_new» » tre_mem_new | 468 #define tre_bt_mem_new tre_mem_new |
| 506 #define tre_bt_mem_alloc» tre_mem_alloc | 469 #define tre_bt_mem_alloc tre_mem_alloc |
| 507 #define tre_bt_mem_destroy» tre_mem_destroy | 470 #define tre_bt_mem_destroy tre_mem_destroy |
| 508 | 471 |
| 472 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, \ |
| 473 _tags, _mbstate) \ |
| 474 do { \ |
| 475 int i; \ |
| 476 if (!stack->next) { \ |
| 477 tre_backtrack_t s; \ |
| 478 s = tre_bt_mem_alloc(mem, sizeof(*s)); \ |
| 479 if (!s) { \ |
| 480 tre_bt_mem_destroy(mem); \ |
| 481 if (tags) \ |
| 482 xfree(tags); \ |
| 483 if (pmatch) \ |
| 484 xfree(pmatch); \ |
| 485 if (states_seen) \ |
| 486 xfree(states_seen); \ |
| 487 return REG_ESPACE; \ |
| 488 } \ |
| 489 s->prev = stack; \ |
| 490 s->next = NULL; \ |
| 491 s->item.tags = tre_bt_mem_alloc(mem, sizeof(*tags) * tnfa->num_tags); \ |
| 492 if (!s->item.tags) { \ |
| 493 tre_bt_mem_destroy(mem); \ |
| 494 if (tags) \ |
| 495 xfree(tags); \ |
| 496 if (pmatch) \ |
| 497 xfree(pmatch); \ |
| 498 if (states_seen) \ |
| 499 xfree(states_seen); \ |
| 500 return REG_ESPACE; \ |
| 501 } \ |
| 502 stack->next = s; \ |
| 503 stack = s; \ |
| 504 } else \ |
| 505 stack = stack->next; \ |
| 506 stack->item.pos = (_pos); \ |
| 507 stack->item.str_byte = (_str_byte); \ |
| 508 stack->item.state = (_state); \ |
| 509 stack->item.state_id = (_state_id); \ |
| 510 stack->item.next_c = (_next_c); \ |
| 511 for (i = 0; i < tnfa->num_tags; i++) \ |
| 512 stack->item.tags[i] = (_tags)[i]; \ |
| 513 BT_STACK_MBSTATE_IN; \ |
| 514 } while (0) |
| 509 | 515 |
| 510 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _t
ags, _mbstate) \ | 516 #define BT_STACK_POP() \ |
| 511 do» » » » » » » » » \ | 517 do { \ |
| 512 {» » » » » » » » » \ | 518 int i; \ |
| 513 int i;» » » » » » » » \ | 519 assert(stack->prev); \ |
| 514 if (!stack->next)»» » » » » » \ | 520 pos = stack->item.pos; \ |
| 515 » {» » » » » » » » \ | 521 str_byte = stack->item.str_byte; \ |
| 516 » tre_backtrack_t s;» » » » » » \ | 522 state = stack->item.state; \ |
| 517 » s = tre_bt_mem_alloc(mem, sizeof(*s));» » » \ | 523 next_c = stack->item.next_c; \ |
| 518 » if (!s)» » » » » » » \ | 524 for (i = 0; i < tnfa->num_tags; i++) \ |
| 519 » {» » » » » » » » \ | 525 tags[i] = stack->item.tags[i]; \ |
| 520 » tre_bt_mem_destroy(mem);» » » » » \ | 526 BT_STACK_MBSTATE_OUT; \ |
| 521 » if (tags)»» » » » » » \ | 527 stack = stack->prev; \ |
| 522 » » xfree(tags);» » » » » » \ | 528 } while (0) |
| 523 » if (pmatch)» » » » » » \ | |
| 524 » » xfree(pmatch);» » » » » » \ | |
| 525 » if (states_seen)» » » » » » \ | |
| 526 » » xfree(states_seen);» » » » » \ | |
| 527 » return REG_ESPACE;» » » » » \ | |
| 528 » }» » » » » » » » \ | |
| 529 » s->prev = stack;» » » » » » \ | |
| 530 » s->next = NULL;» » » » » » \ | |
| 531 » s->item.tags = tre_bt_mem_alloc(mem,» » » » \ | |
| 532 » » » » » sizeof(*tags) * tnfa->num_tags); \ | |
| 533 » if (!s->item.tags)» » » » » » \ | |
| 534 » {» » » » » » » » \ | |
| 535 » tre_bt_mem_destroy(mem);» » » » » \ | |
| 536 » if (tags)»» » » » » » \ | |
| 537 » » xfree(tags);» » » » » » \ | |
| 538 » if (pmatch)» » » » » » \ | |
| 539 » » xfree(pmatch);» » » » » » \ | |
| 540 » if (states_seen)» » » » » » \ | |
| 541 » » xfree(states_seen);» » » » » \ | |
| 542 » return REG_ESPACE;» » » » » \ | |
| 543 » }» » » » » » » » \ | |
| 544 » stack->next = s;» » » » » » \ | |
| 545 » stack = s;» » » » » » » \ | |
| 546 » }» » » » » » » » \ | |
| 547 else» » » » » » » » \ | |
| 548 » stack = stack->next;» » » » » » \ | |
| 549 stack->item.pos = (_pos);»» » » » » \ | |
| 550 stack->item.str_byte = (_str_byte);» » » » \ | |
| 551 stack->item.state = (_state);» » » » » \ | |
| 552 stack->item.state_id = (_state_id);» » » » \ | |
| 553 stack->item.next_c = (_next_c);» » » » » \ | |
| 554 for (i = 0; i < tnfa->num_tags; i++)» » » » \ | |
| 555 » stack->item.tags[i] = (_tags)[i];» » » » \ | |
| 556 BT_STACK_MBSTATE_IN;» » » » » » \ | |
| 557 }» » » » » » » » » \ | |
| 558 while (0) | |
| 559 | |
| 560 #define BT_STACK_POP()» » » » » » » \ | |
| 561 do» » » » » » » » » \ | |
| 562 {» » » » » » » » » \ | |
| 563 int i;» » » » » » » » \ | |
| 564 assert(stack->prev);» » » » » » \ | |
| 565 pos = stack->item.pos;» » » » » » \ | |
| 566 str_byte = stack->item.str_byte;» » » » » \ | |
| 567 state = stack->item.state;» » » » » \ | |
| 568 next_c = stack->item.next_c;» » » » » \ | |
| 569 for (i = 0; i < tnfa->num_tags; i++)» » » » \ | |
| 570 » tags[i] = stack->item.tags[i];» » » » » \ | |
| 571 BT_STACK_MBSTATE_OUT;» » » » » » \ | |
| 572 stack = stack->prev;» » » » » » \ | |
| 573 }» » » » » » » » » \ | |
| 574 while (0) | |
| 575 | 529 |
| 576 #undef MIN | 530 #undef MIN |
| 577 #define MIN(a, b) ((a) <= (b) ? (a) : (b)) | 531 #define MIN(a, b) ((a) <= (b) ? (a) : (b)) |
| 578 | 532 |
| 579 static reg_errcode_t | 533 static reg_errcode_t tre_tnfa_run_backtrack(const tre_tnfa_t* tnfa, |
| 580 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, | 534 const void* string, |
| 581 » » int *match_tags, int eflags, int *match_end_ofs) | 535 int* match_tags, |
| 582 { | 536 int eflags, |
| 537 int* match_end_ofs) { |
| 583 /* State variables required by GET_NEXT_WCHAR. */ | 538 /* State variables required by GET_NEXT_WCHAR. */ |
| 584 tre_char_t prev_c = 0, next_c = 0; | 539 tre_char_t prev_c = 0, next_c = 0; |
| 585 const char *str_byte = string; | 540 const char* str_byte = string; |
| 586 int pos = 0; | 541 int pos = 0; |
| 587 int pos_add_next = 1; | 542 int pos_add_next = 1; |
| 588 #ifdef TRE_MBSTATE | 543 #ifdef TRE_MBSTATE |
| 589 mbstate_t mbstate; | 544 mbstate_t mbstate; |
| 590 #endif /* TRE_MBSTATE */ | 545 #endif /* TRE_MBSTATE */ |
| 591 int reg_notbol = eflags & REG_NOTBOL; | 546 int reg_notbol = eflags & REG_NOTBOL; |
| 592 int reg_noteol = eflags & REG_NOTEOL; | 547 int reg_noteol = eflags & REG_NOTEOL; |
| 593 int reg_newline = tnfa->cflags & REG_NEWLINE; | 548 int reg_newline = tnfa->cflags & REG_NEWLINE; |
| 594 | 549 |
| 595 /* These are used to remember the necessary values of the above | 550 /* These are used to remember the necessary values of the above |
| 596 variables to return to the position where the current search | 551 variables to return to the position where the current search |
| 597 started from. */ | 552 started from. */ |
| 598 int next_c_start; | 553 int next_c_start; |
| 599 const char *str_byte_start; | 554 const char* str_byte_start; |
| 600 int pos_start = -1; | 555 int pos_start = -1; |
| 601 #ifdef TRE_MBSTATE | 556 #ifdef TRE_MBSTATE |
| 602 mbstate_t mbstate_start; | 557 mbstate_t mbstate_start; |
| 603 #endif /* TRE_MBSTATE */ | 558 #endif /* TRE_MBSTATE */ |
| 604 | 559 |
| 605 /* End offset of best match so far, or -1 if no match found yet. */ | 560 /* End offset of best match so far, or -1 if no match found yet. */ |
| 606 int match_eo = -1; | 561 int match_eo = -1; |
| 607 /* Tag arrays. */ | 562 /* Tag arrays. */ |
| 608 int *next_tags, *tags = NULL; | 563 int *next_tags, *tags = NULL; |
| 609 /* Current TNFA state. */ | 564 /* Current TNFA state. */ |
| 610 tre_tnfa_transition_t *state; | 565 tre_tnfa_transition_t* state; |
| 611 int *states_seen = NULL; | 566 int* states_seen = NULL; |
| 612 | 567 |
| 613 /* Memory allocator to for allocating the backtracking stack. */ | 568 /* Memory allocator to for allocating the backtracking stack. */ |
| 614 tre_mem_t mem = tre_bt_mem_new(); | 569 tre_mem_t mem = tre_bt_mem_new(); |
| 615 | 570 |
| 616 /* The backtracking stack. */ | 571 /* The backtracking stack. */ |
| 617 tre_backtrack_t stack; | 572 tre_backtrack_t stack; |
| 618 | 573 |
| 619 tre_tnfa_transition_t *trans_i; | 574 tre_tnfa_transition_t* trans_i; |
| 620 regmatch_t *pmatch = NULL; | 575 regmatch_t* pmatch = NULL; |
| 621 int ret; | 576 int ret; |
| 622 | 577 |
| 623 #ifdef TRE_MBSTATE | 578 #ifdef TRE_MBSTATE |
| 624 memset(&mbstate, '\0', sizeof(mbstate)); | 579 memset(&mbstate, '\0', sizeof(mbstate)); |
| 625 #endif /* TRE_MBSTATE */ | 580 #endif /* TRE_MBSTATE */ |
| 626 | 581 |
| 627 if (!mem) | 582 if (!mem) |
| 628 return REG_ESPACE; | 583 return REG_ESPACE; |
| 629 stack = tre_bt_mem_alloc(mem, sizeof(*stack)); | 584 stack = tre_bt_mem_alloc(mem, sizeof(*stack)); |
| 630 if (!stack) | 585 if (!stack) { |
| 631 { | 586 ret = REG_ESPACE; |
| 587 goto error_exit; |
| 588 } |
| 589 stack->prev = NULL; |
| 590 stack->next = NULL; |
| 591 |
| 592 if (tnfa->num_tags) { |
| 593 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); |
| 594 if (!tags) { |
| 632 ret = REG_ESPACE; | 595 ret = REG_ESPACE; |
| 633 goto error_exit; | 596 goto error_exit; |
| 634 } | 597 } |
| 635 stack->prev = NULL; | 598 } |
| 636 stack->next = NULL; | 599 if (tnfa->num_submatches) { |
| 600 pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); |
| 601 if (!pmatch) { |
| 602 ret = REG_ESPACE; |
| 603 goto error_exit; |
| 604 } |
| 605 } |
| 606 if (tnfa->num_states) { |
| 607 states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); |
| 608 if (!states_seen) { |
| 609 ret = REG_ESPACE; |
| 610 goto error_exit; |
| 611 } |
| 612 } |
| 637 | 613 |
| 638 if (tnfa->num_tags) | 614 retry : { |
| 639 { | 615 int i; |
| 640 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); | 616 for (i = 0; i < tnfa->num_tags; i++) { |
| 641 if (!tags) | 617 tags[i] = -1; |
| 642 » { | 618 if (match_tags) |
| 643 » ret = REG_ESPACE; | 619 match_tags[i] = -1; |
| 644 » goto error_exit; | |
| 645 » } | |
| 646 } | |
| 647 if (tnfa->num_submatches) | |
| 648 { | |
| 649 pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); | |
| 650 if (!pmatch) | |
| 651 » { | |
| 652 » ret = REG_ESPACE; | |
| 653 » goto error_exit; | |
| 654 » } | |
| 655 } | |
| 656 if (tnfa->num_states) | |
| 657 { | |
| 658 states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); | |
| 659 if (!states_seen) | |
| 660 » { | |
| 661 » ret = REG_ESPACE; | |
| 662 » goto error_exit; | |
| 663 » } | |
| 664 } | |
| 665 | |
| 666 retry: | |
| 667 { | |
| 668 int i; | |
| 669 for (i = 0; i < tnfa->num_tags; i++) | |
| 670 { | |
| 671 » tags[i] = -1; | |
| 672 » if (match_tags) | |
| 673 » match_tags[i] = -1; | |
| 674 } | |
| 675 for (i = 0; i < tnfa->num_states; i++) | |
| 676 states_seen[i] = 0; | |
| 677 } | 620 } |
| 621 for (i = 0; i < tnfa->num_states; i++) |
| 622 states_seen[i] = 0; |
| 623 } |
| 678 | 624 |
| 679 state = NULL; | 625 state = NULL; |
| 680 pos = pos_start; | 626 pos = pos_start; |
| 681 GET_NEXT_WCHAR(); | 627 GET_NEXT_WCHAR(); |
| 682 pos_start = pos; | 628 pos_start = pos; |
| 683 next_c_start = next_c; | 629 next_c_start = next_c; |
| 684 str_byte_start = str_byte; | 630 str_byte_start = str_byte; |
| 685 #ifdef TRE_MBSTATE | 631 #ifdef TRE_MBSTATE |
| 686 mbstate_start = mbstate; | 632 mbstate_start = mbstate; |
| 687 #endif /* TRE_MBSTATE */ | 633 #endif /* TRE_MBSTATE */ |
| 688 | 634 |
| 689 /* Handle initial states. */ | 635 /* Handle initial states. */ |
| 690 next_tags = NULL; | 636 next_tags = NULL; |
| 691 for (trans_i = tnfa->initial; trans_i->state; trans_i++) | 637 for (trans_i = tnfa->initial; trans_i->state; trans_i++) { |
| 692 { | 638 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) { |
| 693 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) | 639 continue; |
| 694 » { | |
| 695 » continue; | |
| 696 » } | |
| 697 if (state == NULL) | |
| 698 » { | |
| 699 » /* Start from this state. */ | |
| 700 » state = trans_i->state; | |
| 701 » next_tags = trans_i->tags; | |
| 702 » } | |
| 703 else | |
| 704 » { | |
| 705 » /* Backtrack to this state. */ | |
| 706 » BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, | |
| 707 » » » trans_i->state_id, next_c, tags, mbstate); | |
| 708 » { | |
| 709 » int *tmp = trans_i->tags; | |
| 710 » if (tmp) | |
| 711 » while (*tmp >= 0) | |
| 712 » » stack->item.tags[*tmp++] = pos; | |
| 713 » } | |
| 714 » } | |
| 715 } | 640 } |
| 641 if (state == NULL) { |
| 642 /* Start from this state. */ |
| 643 state = trans_i->state; |
| 644 next_tags = trans_i->tags; |
| 645 } else { |
| 646 /* Backtrack to this state. */ |
| 647 BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, next_c, |
| 648 tags, mbstate); |
| 649 { |
| 650 int* tmp = trans_i->tags; |
| 651 if (tmp) |
| 652 while (*tmp >= 0) |
| 653 stack->item.tags[*tmp++] = pos; |
| 654 } |
| 655 } |
| 656 } |
| 716 | 657 |
| 717 if (next_tags) | 658 if (next_tags) |
| 718 for (; *next_tags >= 0; next_tags++) | 659 for (; *next_tags >= 0; next_tags++) |
| 719 tags[*next_tags] = pos; | 660 tags[*next_tags] = pos; |
| 720 | 661 |
| 721 | |
| 722 if (state == NULL) | 662 if (state == NULL) |
| 723 goto backtrack; | 663 goto backtrack; |
| 724 | 664 |
| 725 while (1) | 665 while (1) { |
| 726 { | 666 tre_tnfa_transition_t* next_state; |
| 727 tre_tnfa_transition_t *next_state; | 667 int empty_br_match; |
| 728 int empty_br_match; | |
| 729 | 668 |
| 730 if (state == tnfa->final) | 669 if (state == tnfa->final) { |
| 731 » { | 670 if (match_eo < pos || (match_eo == pos && match_tags && |
| 732 » if (match_eo < pos | 671 tre_tag_order(tnfa->num_tags, tnfa->tag_directions, |
| 733 » || (match_eo == pos | 672 tags, match_tags))) { |
| 734 » » && match_tags | 673 int i; |
| 735 » » && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, | 674 /* This match wins the previous match. */ |
| 736 » » » » tags, match_tags))) | 675 match_eo = pos; |
| 737 » { | 676 if (match_tags) |
| 738 » int i; | 677 for (i = 0; i < tnfa->num_tags; i++) |
| 739 » /* This match wins the previous match. */ | 678 match_tags[i] = tags[i]; |
| 740 » match_eo = pos; | 679 } |
| 741 » if (match_tags) | 680 /* Our TNFAs never have transitions leaving from the final state, |
| 742 » » for (i = 0; i < tnfa->num_tags; i++) | 681 so we jump right to backtracking. */ |
| 743 » » match_tags[i] = tags[i]; | 682 goto backtrack; |
| 744 » } | 683 } |
| 745 » /* Our TNFAs never have transitions leaving from the final state, | |
| 746 » so we jump right to backtracking. */ | |
| 747 » goto backtrack; | |
| 748 » } | |
| 749 | 684 |
| 750 /* Go to the next character in the input string. */ | 685 /* Go to the next character in the input string. */ |
| 751 empty_br_match = 0; | 686 empty_br_match = 0; |
| 752 trans_i = state; | 687 trans_i = state; |
| 753 if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) | 688 if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) { |
| 754 » { | 689 /* This is a back reference state. All transitions leaving from |
| 755 » /* This is a back reference state. All transitions leaving from | 690 this state have the same back reference "assertion". Instead |
| 756 » this state have the same back reference "assertion". Instead | 691 of reading the next character, we match the back reference. */ |
| 757 » of reading the next character, we match the back reference. */ | 692 int so, eo, bt = trans_i->u.backref; |
| 758 » int so, eo, bt = trans_i->u.backref; | 693 int bt_len; |
| 759 » int bt_len; | 694 int result; |
| 760 » int result; | |
| 761 | 695 |
| 762 » /* Get the substring we need to match against. Remember to | 696 /* Get the substring we need to match against. Remember to |
| 763 » turn off REG_NOSUB temporarily. */ | 697 turn off REG_NOSUB temporarily. */ |
| 764 » tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, | 698 tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, tnfa, tags, |
| 765 » » » tnfa, tags, pos); | 699 pos); |
| 766 » so = pmatch[bt].rm_so; | 700 so = pmatch[bt].rm_so; |
| 767 » eo = pmatch[bt].rm_eo; | 701 eo = pmatch[bt].rm_eo; |
| 768 » bt_len = eo - so; | 702 bt_len = eo - so; |
| 769 | 703 |
| 770 » result = strncmp((const char*)string + so, str_byte - 1, | 704 result = strncmp((const char*)string + so, str_byte - 1, (size_t)bt_len); |
| 771 » » » » (size_t)bt_len); | |
| 772 | 705 |
| 773 » if (result == 0) | 706 if (result == 0) { |
| 774 » { | 707 /* Back reference matched. Check for infinite loop. */ |
| 775 » /* Back reference matched. Check for infinite loop. */ | 708 if (bt_len == 0) |
| 776 » if (bt_len == 0) | 709 empty_br_match = 1; |
| 777 » » empty_br_match = 1; | 710 if (empty_br_match && states_seen[trans_i->state_id]) { |
| 778 » if (empty_br_match && states_seen[trans_i->state_id]) | 711 goto backtrack; |
| 779 » » { | 712 } |
| 780 » » goto backtrack; | |
| 781 » » } | |
| 782 | 713 |
| 783 » states_seen[trans_i->state_id] = empty_br_match; | 714 states_seen[trans_i->state_id] = empty_br_match; |
| 784 | 715 |
| 785 » /* Advance in input string and resync `prev_c', `next_c' | 716 /* Advance in input string and resync `prev_c', `next_c' |
| 786 » » and pos. */ | 717 and pos. */ |
| 787 » str_byte += bt_len - 1; | 718 str_byte += bt_len - 1; |
| 788 » pos += bt_len - 1; | 719 pos += bt_len - 1; |
| 789 » GET_NEXT_WCHAR(); | 720 GET_NEXT_WCHAR(); |
| 790 » } | 721 } else { |
| 791 » else | 722 goto backtrack; |
| 792 » { | 723 } |
| 793 » goto backtrack; | 724 } else { |
| 794 » } | 725 /* Check for end of string. */ |
| 795 » } | 726 if (next_c == L'\0') |
| 796 else | 727 goto backtrack; |
| 797 » { | |
| 798 » /* Check for end of string. */ | |
| 799 » if (next_c == L'\0') | |
| 800 » » goto backtrack; | |
| 801 | 728 |
| 802 » /* Read the next character. */ | 729 /* Read the next character. */ |
| 803 » GET_NEXT_WCHAR(); | 730 GET_NEXT_WCHAR(); |
| 804 » } | 731 } |
| 805 | 732 |
| 806 next_state = NULL; | 733 next_state = NULL; |
| 807 for (trans_i = state; trans_i->state; trans_i++) | 734 for (trans_i = state; trans_i->state; trans_i++) { |
| 808 » { | 735 if (trans_i->code_min <= (tre_cint_t)prev_c && |
| 809 » if (trans_i->code_min <= (tre_cint_t)prev_c | 736 trans_i->code_max >= (tre_cint_t)prev_c) { |
| 810 » && trans_i->code_max >= (tre_cint_t)prev_c) | 737 if (trans_i->assertions && |
| 811 » { | 738 (CHECK_ASSERTIONS(trans_i->assertions) || |
| 812 » if (trans_i->assertions | 739 CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) { |
| 813 » » && (CHECK_ASSERTIONS(trans_i->assertions) | 740 continue; |
| 814 » » || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) | 741 } |
| 815 » » { | |
| 816 » » continue; | |
| 817 » » } | |
| 818 | 742 |
| 819 » if (next_state == NULL) | 743 if (next_state == NULL) { |
| 820 » » { | 744 /* First matching transition. */ |
| 821 » » /* First matching transition. */ | 745 next_state = trans_i->state; |
| 822 » » next_state = trans_i->state; | 746 next_tags = trans_i->tags; |
| 823 » » next_tags = trans_i->tags; | 747 } else { |
| 824 » » } | 748 /* Second matching transition. We may need to backtrack here |
| 825 » else | 749 to take this transition instead of the first one, so we |
| 826 » » { | 750 push this transition in the backtracking stack so we can |
| 827 » » /* Second matching transition. We may need to backtrack here | 751 jump back here if needed. */ |
| 828 » » to take this transition instead of the first one, so we | 752 BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, |
| 829 » » push this transition in the backtracking stack so we can | 753 next_c, tags, mbstate); |
| 830 » » jump back here if needed. */ | 754 { |
| 831 » » BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, | 755 int* tmp; |
| 832 » » » » trans_i->state_id, next_c, tags, mbstate); | 756 for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) |
| 833 » » { | 757 stack->item.tags[*tmp] = pos; |
| 834 » » int *tmp; | 758 } |
| 835 » » for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) | 759 #if 0 /* XXX - it's important not to look at all transitions here to keep \ |
| 836 » » stack->item.tags[*tmp] = pos; | 760 the stack small! */ |
| 837 » » } | |
| 838 #if 0 /* XXX - it's important not to look at all transitions here to keep | |
| 839 » the stack small! */ | |
| 840 break; | 761 break; |
| 841 #endif | 762 #endif |
| 842 » » } | 763 } |
| 843 » } | 764 } |
| 844 » } | 765 } |
| 845 | 766 |
| 846 if (next_state != NULL) | 767 if (next_state != NULL) { |
| 847 » { | 768 /* Matching transitions were found. Take the first one. */ |
| 848 » /* Matching transitions were found. Take the first one. */ | 769 state = next_state; |
| 849 » state = next_state; | |
| 850 | 770 |
| 851 » /* Update the tag values. */ | 771 /* Update the tag values. */ |
| 852 » if (next_tags) | 772 if (next_tags) |
| 853 » while (*next_tags >= 0) | 773 while (*next_tags >= 0) |
| 854 » tags[*next_tags++] = pos; | 774 tags[*next_tags++] = pos; |
| 855 » } | 775 } else { |
| 856 else | 776 backtrack: |
| 857 » { | 777 /* A matching transition was not found. Try to backtrack. */ |
| 858 » backtrack: | 778 if (stack->prev) { |
| 859 » /* A matching transition was not found. Try to backtrack. */ | 779 if (stack->item.state->assertions & ASSERT_BACKREF) { |
| 860 » if (stack->prev) | 780 states_seen[stack->item.state_id] = 0; |
| 861 » { | 781 } |
| 862 » if (stack->item.state->assertions & ASSERT_BACKREF) | |
| 863 » » { | |
| 864 » » states_seen[stack->item.state_id] = 0; | |
| 865 » » } | |
| 866 | 782 |
| 867 » BT_STACK_POP(); | 783 BT_STACK_POP(); |
| 868 » } | 784 } else if (match_eo < 0) { |
| 869 » else if (match_eo < 0) | 785 /* Try starting from a later position in the input string. */ |
| 870 » { | 786 /* Check for end of string. */ |
| 871 » /* Try starting from a later position in the input string. */ | 787 if (next_c == L'\0') { |
| 872 » /* Check for end of string. */ | 788 break; |
| 873 » if (next_c == L'\0') | 789 } |
| 874 » » { | 790 next_c = next_c_start; |
| 875 » » break; | |
| 876 » » } | |
| 877 » next_c = next_c_start; | |
| 878 #ifdef TRE_MBSTATE | 791 #ifdef TRE_MBSTATE |
| 879 » mbstate = mbstate_start; | 792 mbstate = mbstate_start; |
| 880 #endif /* TRE_MBSTATE */ | 793 #endif /* TRE_MBSTATE */ |
| 881 » str_byte = str_byte_start; | 794 str_byte = str_byte_start; |
| 882 » goto retry; | 795 goto retry; |
| 883 » } | 796 } else { |
| 884 » else | 797 break; |
| 885 » { | 798 } |
| 886 » break; | |
| 887 » } | |
| 888 » } | |
| 889 } | 799 } |
| 800 } |
| 890 | 801 |
| 891 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; | 802 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; |
| 892 *match_end_ofs = match_eo; | 803 *match_end_ofs = match_eo; |
| 893 | 804 |
| 894 error_exit: | 805 error_exit: |
| 895 tre_bt_mem_destroy(mem); | 806 tre_bt_mem_destroy(mem); |
| 896 #ifndef TRE_USE_ALLOCA | 807 #ifndef TRE_USE_ALLOCA |
| 897 if (tags) | 808 if (tags) |
| 898 xfree(tags); | 809 xfree(tags); |
| 899 if (pmatch) | 810 if (pmatch) |
| 900 xfree(pmatch); | 811 xfree(pmatch); |
| 901 if (states_seen) | 812 if (states_seen) |
| 902 xfree(states_seen); | 813 xfree(states_seen); |
| 903 #endif /* !TRE_USE_ALLOCA */ | 814 #endif /* !TRE_USE_ALLOCA */ |
| 904 | 815 |
| 905 return ret; | 816 return ret; |
| 906 } | 817 } |
| 907 | 818 |
| 908 /*********************************************************************** | 819 /*********************************************************************** |
| 909 from regexec.c | 820 from regexec.c |
| 910 ***********************************************************************/ | 821 ***********************************************************************/ |
| 911 | 822 |
| 912 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match | 823 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match |
| 913 endpoint values. */ | 824 endpoint values. */ |
| 914 static void | 825 static void tre_fill_pmatch(size_t nmatch, |
| 915 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, | 826 regmatch_t pmatch[], |
| 916 » » const tre_tnfa_t *tnfa, int *tags, int match_eo) | 827 int cflags, |
| 917 { | 828 const tre_tnfa_t* tnfa, |
| 918 tre_submatch_data_t *submatch_data; | 829 int* tags, |
| 830 int match_eo) { |
| 831 tre_submatch_data_t* submatch_data; |
| 919 unsigned int i, j; | 832 unsigned int i, j; |
| 920 int *parents; | 833 int* parents; |
| 921 | 834 |
| 922 i = 0; | 835 i = 0; |
| 923 if (match_eo >= 0 && !(cflags & REG_NOSUB)) | 836 if (match_eo >= 0 && !(cflags & REG_NOSUB)) { |
| 924 { | 837 /* Construct submatch offsets from the tags. */ |
| 925 /* Construct submatch offsets from the tags. */ | 838 submatch_data = tnfa->submatch_data; |
| 926 submatch_data = tnfa->submatch_data; | 839 while (i < tnfa->num_submatches && i < nmatch) { |
| 927 while (i < tnfa->num_submatches && i < nmatch) | 840 if (submatch_data[i].so_tag == tnfa->end_tag) |
| 928 » { | 841 pmatch[i].rm_so = match_eo; |
| 929 » if (submatch_data[i].so_tag == tnfa->end_tag) | 842 else |
| 930 » pmatch[i].rm_so = match_eo; | 843 pmatch[i].rm_so = tags[submatch_data[i].so_tag]; |
| 931 » else | |
| 932 » pmatch[i].rm_so = tags[submatch_data[i].so_tag]; | |
| 933 | 844 |
| 934 » if (submatch_data[i].eo_tag == tnfa->end_tag) | 845 if (submatch_data[i].eo_tag == tnfa->end_tag) |
| 935 » pmatch[i].rm_eo = match_eo; | 846 pmatch[i].rm_eo = match_eo; |
| 936 » else | 847 else |
| 937 » pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; | 848 pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; |
| 938 | 849 |
| 939 » /* If either of the endpoints were not used, this submatch | 850 /* If either of the endpoints were not used, this submatch |
| 940 » was not part of the match. */ | 851 was not part of the match. */ |
| 941 » if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) | 852 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) |
| 942 » pmatch[i].rm_so = pmatch[i].rm_eo = -1; | 853 pmatch[i].rm_so = pmatch[i].rm_eo = -1; |
| 943 | 854 |
| 944 i++; | |
| 945 } | |
| 946 /* Reset all submatches that are not within all of their parent | |
| 947 submatches. */ | |
| 948 i = 0; | |
| 949 while (i < tnfa->num_submatches && i < nmatch) | |
| 950 { | |
| 951 if (pmatch[i].rm_eo == -1) | |
| 952 assert(pmatch[i].rm_so == -1); | |
| 953 assert(pmatch[i].rm_so <= pmatch[i].rm_eo); | |
| 954 | |
| 955 parents = submatch_data[i].parents; | |
| 956 if (parents != NULL) | |
| 957 for (j = 0; parents[j] >= 0; j++) | |
| 958 { | |
| 959 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so | |
| 960 || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) | |
| 961 pmatch[i].rm_so = pmatch[i].rm_eo = -1; | |
| 962 } | |
| 963 i++; | |
| 964 } | |
| 965 } | |
| 966 | |
| 967 while (i < nmatch) | |
| 968 { | |
| 969 pmatch[i].rm_so = -1; | |
| 970 pmatch[i].rm_eo = -1; | |
| 971 i++; | 855 i++; |
| 972 } | 856 } |
| 857 /* Reset all submatches that are not within all of their parent |
| 858 submatches. */ |
| 859 i = 0; |
| 860 while (i < tnfa->num_submatches && i < nmatch) { |
| 861 if (pmatch[i].rm_eo == -1) |
| 862 assert(pmatch[i].rm_so == -1); |
| 863 assert(pmatch[i].rm_so <= pmatch[i].rm_eo); |
| 864 |
| 865 parents = submatch_data[i].parents; |
| 866 if (parents != NULL) |
| 867 for (j = 0; parents[j] >= 0; j++) { |
| 868 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so || |
| 869 pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) |
| 870 pmatch[i].rm_so = pmatch[i].rm_eo = -1; |
| 871 } |
| 872 i++; |
| 873 } |
| 874 } |
| 875 |
| 876 while (i < nmatch) { |
| 877 pmatch[i].rm_so = -1; |
| 878 pmatch[i].rm_eo = -1; |
| 879 i++; |
| 880 } |
| 973 } | 881 } |
| 974 | 882 |
| 975 | |
| 976 /* | 883 /* |
| 977 Wrapper functions for POSIX compatible regexp matching. | 884 Wrapper functions for POSIX compatible regexp matching. |
| 978 */ | 885 */ |
| 979 | 886 |
| 980 int | 887 int regexec(const regex_t* restrict preg, |
| 981 regexec(const regex_t *restrict preg, const char *restrict string, | 888 const char* restrict string, |
| 982 » size_t nmatch, regmatch_t pmatch[restrict], int eflags) | 889 size_t nmatch, |
| 983 { | 890 regmatch_t pmatch[restrict], |
| 984 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; | 891 int eflags) { |
| 892 tre_tnfa_t* tnfa = (void*)preg->TRE_REGEX_T_FIELD; |
| 985 reg_errcode_t status; | 893 reg_errcode_t status; |
| 986 int *tags = NULL, eo; | 894 int *tags = NULL, eo; |
| 987 if (tnfa->cflags & REG_NOSUB) nmatch = 0; | 895 if (tnfa->cflags & REG_NOSUB) |
| 988 if (tnfa->num_tags > 0 && nmatch > 0) | 896 nmatch = 0; |
| 989 { | 897 if (tnfa->num_tags > 0 && nmatch > 0) { |
| 990 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); | 898 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); |
| 991 if (tags == NULL) | 899 if (tags == NULL) |
| 992 » return REG_ESPACE; | 900 return REG_ESPACE; |
| 993 } | 901 } |
| 994 | 902 |
| 995 /* Dispatch to the appropriate matcher. */ | 903 /* Dispatch to the appropriate matcher. */ |
| 996 if (tnfa->have_backrefs) | 904 if (tnfa->have_backrefs) { |
| 997 { | 905 /* The regex has back references, use the backtracking matcher. */ |
| 998 /* The regex has back references, use the backtracking matcher. */ | 906 status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); |
| 999 status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); | 907 } else { |
| 1000 } | 908 /* Exact matching, no back references, use the parallel matcher. */ |
| 1001 else | 909 status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); |
| 1002 { | 910 } |
| 1003 /* Exact matching, no back references, use the parallel matcher. */ | |
| 1004 status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); | |
| 1005 } | |
| 1006 | 911 |
| 1007 if (status == REG_OK) | 912 if (status == REG_OK) |
| 1008 /* A match was found, so fill the submatch registers. */ | 913 /* A match was found, so fill the submatch registers. */ |
| 1009 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); | 914 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); |
| 1010 if (tags) | 915 if (tags) |
| 1011 xfree(tags); | 916 xfree(tags); |
| 1012 return status; | 917 return status; |
| 1013 } | 918 } |
| OLD | NEW |