Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(247)

Side by Side Diff: fusl/src/regex/regexec.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: headers too Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 regexec.c - TRE POSIX compatible matching functions (and more). 2 regexec.c - TRE POSIX compatible matching functions (and more).
3 3
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5 All rights reserved. 5 All rights reserved.
6 6
7 Redistribution and use in source and binary forms, with or without 7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions 8 modification, are permitted provided that the following conditions
9 are met: 9 are met:
10 10
(...skipping 23 matching lines...) Expand all
34 #include <wchar.h> 34 #include <wchar.h>
35 #include <wctype.h> 35 #include <wctype.h>
36 #include <limits.h> 36 #include <limits.h>
37 37
38 #include <regex.h> 38 #include <regex.h>
39 39
40 #include "tre.h" 40 #include "tre.h"
41 41
42 #include <assert.h> 42 #include <assert.h>
43 43
44 static void 44 static void tre_fill_pmatch(size_t nmatch,
45 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 45 regmatch_t pmatch[],
46 » » const tre_tnfa_t *tnfa, int *tags, int match_eo); 46 int cflags,
47 const tre_tnfa_t* tnfa,
48 int* tags,
49 int match_eo);
47 50
48 /*********************************************************************** 51 /***********************************************************************
49 from tre-match-utils.h 52 from tre-match-utils.h
50 ***********************************************************************/ 53 ***********************************************************************/
51 54
52 #define GET_NEXT_WCHAR() do { \ 55 #define GET_NEXT_WCHAR() \
53 prev_c = next_c; pos += pos_add_next; \ 56 do { \
54 if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \ 57 prev_c = next_c; \
55 if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \ 58 pos += pos_add_next; \
56 else pos_add_next++; \ 59 if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \
57 } \ 60 if (pos_add_next < 0) { \
58 str_byte += pos_add_next; \ 61 ret = REG_NOMATCH; \
62 goto error_exit; \
63 } else \
64 pos_add_next++; \
65 } \
66 str_byte += pos_add_next; \
59 } while (0) 67 } while (0)
60 68
61 #define IS_WORD_CHAR(c)» ((c) == L'_' || tre_isalnum(c)) 69 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
62 70
63 #define CHECK_ASSERTIONS(assertions)» » » » » \ 71 #define CHECK_ASSERTIONS(assertions) \
64 (((assertions & ASSERT_AT_BOL)» » » » » \ 72 (((assertions & ASSERT_AT_BOL) && (pos > 0 || reg_notbol) && \
65 && (pos > 0 || reg_notbol)» » » » » » \ 73 (prev_c != L'\n' || !reg_newline)) || \
66 && (prev_c != L'\n' || !reg_newline))» » » » \ 74 ((assertions & ASSERT_AT_EOL) && (next_c != L'\0' || reg_noteol) && \
67 || ((assertions & ASSERT_AT_EOL)» » » » » \ 75 (next_c != L'\n' || !reg_newline)) || \
68 && (next_c != L'\0' || reg_noteol)» » » » \ 76 ((assertions & ASSERT_AT_BOW) && \
69 && (next_c != L'\n' || !reg_newline))» » » » \ 77 (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) || \
70 || ((assertions & ASSERT_AT_BOW)» » » » » \ 78 ((assertions & ASSERT_AT_EOW) && \
71 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))» \ 79 (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) || \
72 || ((assertions & ASSERT_AT_EOW)» » » » » \ 80 ((assertions & ASSERT_AT_WB) && \
73 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))» » \ 81 (pos != 0 && next_c != L'\0' && \
74 || ((assertions & ASSERT_AT_WB)» » » » » \ 82 IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) || \
75 && (pos != 0 && next_c != L'\0'» » » » » \ 83 ((assertions & ASSERT_AT_WB_NEG) && \
76 » && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))» » \ 84 (pos == 0 || next_c == L'\0' || \
77 || ((assertions & ASSERT_AT_WB_NEG)» » » » » \ 85 IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
78 && (pos == 0 || next_c == L'\0'» » » » » \
79 » || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
80 86
81 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ 87 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
82 (((trans_i->assertions & ASSERT_CHAR_CLASS) \ 88 (((trans_i->assertions & ASSERT_CHAR_CLASS) && \
83 && !(tnfa->cflags & REG_ICASE) \ 89 !(tnfa->cflags & REG_ICASE) && \
84 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ 90 !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) || \
85 || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ 91 ((trans_i->assertions & ASSERT_CHAR_CLASS) && (tnfa->cflags & REG_ICASE) && \
86 && (tnfa->cflags & REG_ICASE) \ 92 !tre_isctype(tre_tolower((tre_cint_t)prev_c), trans_i->u.class) && \
87 && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ 93 !tre_isctype(tre_toupper((tre_cint_t)prev_c), trans_i->u.class)) || \
88 » && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ 94 ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) && \
89 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ 95 tre_neg_char_classes_match(trans_i->neg_classes, (tre_cint_t)prev_c, \
90 && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ 96 tnfa->cflags & REG_ICASE)))
91 tnfa->cflags & REG_ICASE)))
92
93
94
95 97
96 /* Returns 1 if `t1' wins `t2', 0 otherwise. */ 98 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
97 static int 99 static int tre_tag_order(int num_tags,
98 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, 100 tre_tag_direction_t* tag_directions,
99 » int *t1, int *t2) 101 int* t1,
100 { 102 int* t2) {
101 int i; 103 int i;
102 for (i = 0; i < num_tags; i++) 104 for (i = 0; i < num_tags; i++) {
103 { 105 if (tag_directions[i] == TRE_TAG_MINIMIZE) {
104 if (tag_directions[i] == TRE_TAG_MINIMIZE) 106 if (t1[i] < t2[i])
105 » { 107 return 1;
106 » if (t1[i] < t2[i]) 108 if (t1[i] > t2[i])
107 » return 1; 109 return 0;
108 » if (t1[i] > t2[i]) 110 } else {
109 » return 0; 111 if (t1[i] > t2[i])
110 » } 112 return 1;
111 else 113 if (t1[i] < t2[i])
112 » { 114 return 0;
113 » if (t1[i] > t2[i])
114 » return 1;
115 » if (t1[i] < t2[i])
116 » return 0;
117 » }
118 } 115 }
116 }
119 /* assert(0);*/ 117 /* assert(0);*/
120 return 0; 118 return 0;
121 } 119 }
122 120
123 static int 121 static int tre_neg_char_classes_match(tre_ctype_t* classes,
124 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) 122 tre_cint_t wc,
125 { 123 int icase) {
126 while (*classes != (tre_ctype_t)0) 124 while (*classes != (tre_ctype_t)0)
127 if ((!icase && tre_isctype(wc, *classes)) 125 if ((!icase && tre_isctype(wc, *classes)) ||
128 » || (icase && (tre_isctype(tre_toupper(wc), *classes) 126 (icase && (tre_isctype(tre_toupper(wc), *classes) ||
129 » » || tre_isctype(tre_tolower(wc), *classes)))) 127 tre_isctype(tre_tolower(wc), *classes))))
130 return 1; /* Match. */ 128 return 1; /* Match. */
131 else 129 else
132 classes++; 130 classes++;
133 return 0; /* No match. */ 131 return 0; /* No match. */
134 } 132 }
135 133
136
137 /*********************************************************************** 134 /***********************************************************************
138 from tre-match-parallel.c 135 from tre-match-parallel.c
139 ***********************************************************************/ 136 ***********************************************************************/
140 137
141 /* 138 /*
142 This algorithm searches for matches basically by reading characters 139 This algorithm searches for matches basically by reading characters
143 in the searched string one by one, starting at the beginning. All 140 in the searched string one by one, starting at the beginning. All
144 matching paths in the TNFA are traversed in parallel. When two or 141 matching paths in the TNFA are traversed in parallel. When two or
145 more paths reach the same state, exactly one is chosen according to 142 more paths reach the same state, exactly one is chosen according to
146 tag ordering rules; if returning submatches is not required it does 143 tag ordering rules; if returning submatches is not required it does
147 not matter which path is chosen. 144 not matter which path is chosen.
148 145
149 The worst case time required for finding the leftmost and longest 146 The worst case time required for finding the leftmost and longest
150 match, or determining that there is no match, is always linearly 147 match, or determining that there is no match, is always linearly
151 dependent on the length of the text being searched. 148 dependent on the length of the text being searched.
152 149
153 This algorithm cannot handle TNFAs with back referencing nodes. 150 This algorithm cannot handle TNFAs with back referencing nodes.
154 See `tre-match-backtrack.c'. 151 See `tre-match-backtrack.c'.
155 */ 152 */
156 153
157 typedef struct { 154 typedef struct {
158 tre_tnfa_transition_t *state; 155 tre_tnfa_transition_t* state;
159 int *tags; 156 int* tags;
160 } tre_tnfa_reach_t; 157 } tre_tnfa_reach_t;
161 158
162 typedef struct { 159 typedef struct {
163 int pos; 160 int pos;
164 int **tags; 161 int** tags;
165 } tre_reach_pos_t; 162 } tre_reach_pos_t;
166 163
167 164 static reg_errcode_t tre_tnfa_run_parallel(const tre_tnfa_t* tnfa,
168 static reg_errcode_t 165 const void* string,
169 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, 166 int* match_tags,
170 » » int *match_tags, int eflags, 167 int eflags,
171 » » int *match_end_ofs) 168 int* match_end_ofs) {
172 {
173 /* State variables required by GET_NEXT_WCHAR. */ 169 /* State variables required by GET_NEXT_WCHAR. */
174 tre_char_t prev_c = 0, next_c = 0; 170 tre_char_t prev_c = 0, next_c = 0;
175 const char *str_byte = string; 171 const char* str_byte = string;
176 int pos = -1; 172 int pos = -1;
177 int pos_add_next = 1; 173 int pos_add_next = 1;
178 #ifdef TRE_MBSTATE 174 #ifdef TRE_MBSTATE
179 mbstate_t mbstate; 175 mbstate_t mbstate;
180 #endif /* TRE_MBSTATE */ 176 #endif /* TRE_MBSTATE */
181 int reg_notbol = eflags & REG_NOTBOL; 177 int reg_notbol = eflags & REG_NOTBOL;
182 int reg_noteol = eflags & REG_NOTEOL; 178 int reg_noteol = eflags & REG_NOTEOL;
183 int reg_newline = tnfa->cflags & REG_NEWLINE; 179 int reg_newline = tnfa->cflags & REG_NEWLINE;
184 reg_errcode_t ret; 180 reg_errcode_t ret;
185 181
186 char *buf; 182 char* buf;
187 tre_tnfa_transition_t *trans_i; 183 tre_tnfa_transition_t* trans_i;
188 tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; 184 tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
189 tre_reach_pos_t *reach_pos; 185 tre_reach_pos_t* reach_pos;
190 int *tag_i; 186 int* tag_i;
191 int num_tags, i; 187 int num_tags, i;
192 188
193 int match_eo = -1;» /* end offset of match (-1 if no match found yet) */ 189 int match_eo = -1; /* end offset of match (-1 if no match found yet) */
194 int new_match = 0; 190 int new_match = 0;
195 int *tmp_tags = NULL; 191 int* tmp_tags = NULL;
196 int *tmp_iptr; 192 int* tmp_iptr;
197 193
198 #ifdef TRE_MBSTATE 194 #ifdef TRE_MBSTATE
199 memset(&mbstate, '\0', sizeof(mbstate)); 195 memset(&mbstate, '\0', sizeof(mbstate));
200 #endif /* TRE_MBSTATE */ 196 #endif /* TRE_MBSTATE */
201 197
202 if (!match_tags) 198 if (!match_tags)
203 num_tags = 0; 199 num_tags = 0;
204 else 200 else
205 num_tags = tnfa->num_tags; 201 num_tags = tnfa->num_tags;
206 202
207 /* Allocate memory for temporary data required for matching. This needs to 203 /* Allocate memory for temporary data required for matching. This needs to
208 be done for every matching operation to be thread safe. This allocates 204 be done for every matching operation to be thread safe. This allocates
209 everything in a single large block from the stack frame using alloca() 205 everything in a single large block from the stack frame using alloca()
210 or with malloc() if alloca is unavailable. */ 206 or with malloc() if alloca is unavailable. */
211 { 207 {
212 int tbytes, rbytes, pbytes, xbytes, total_bytes; 208 int tbytes, rbytes, pbytes, xbytes, total_bytes;
213 char *tmp_buf; 209 char* tmp_buf;
214 /* Compute the length of the block we need. */ 210 /* Compute the length of the block we need. */
215 tbytes = sizeof(*tmp_tags) * num_tags; 211 tbytes = sizeof(*tmp_tags) * num_tags;
216 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); 212 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
217 pbytes = sizeof(*reach_pos) * tnfa->num_states; 213 pbytes = sizeof(*reach_pos) * tnfa->num_states;
218 xbytes = sizeof(int) * num_tags; 214 xbytes = sizeof(int) * num_tags;
219 total_bytes = 215 total_bytes = (sizeof(long) - 1) * 4 /* for alignment paddings */
220 (sizeof(long) - 1) * 4 /* for alignment paddings */ 216 + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
221 + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
222 217
223 /* Allocate the memory. */ 218 /* Allocate the memory. */
224 buf = xmalloc((unsigned)total_bytes); 219 buf = xmalloc((unsigned)total_bytes);
225 if (buf == NULL) 220 if (buf == NULL)
226 return REG_ESPACE; 221 return REG_ESPACE;
227 memset(buf, 0, (size_t)total_bytes); 222 memset(buf, 0, (size_t)total_bytes);
228 223
229 /* Get the various pointers within tmp_buf (properly aligned). */ 224 /* Get the various pointers within tmp_buf (properly aligned). */
230 tmp_tags = (void *)buf; 225 tmp_tags = (void*)buf;
231 tmp_buf = buf + tbytes; 226 tmp_buf = buf + tbytes;
232 tmp_buf += ALIGN(tmp_buf, long); 227 tmp_buf += ALIGN(tmp_buf, long);
233 reach_next = (void *)tmp_buf; 228 reach_next = (void*)tmp_buf;
234 tmp_buf += rbytes; 229 tmp_buf += rbytes;
235 tmp_buf += ALIGN(tmp_buf, long); 230 tmp_buf += ALIGN(tmp_buf, long);
236 reach = (void *)tmp_buf; 231 reach = (void*)tmp_buf;
237 tmp_buf += rbytes; 232 tmp_buf += rbytes;
238 tmp_buf += ALIGN(tmp_buf, long); 233 tmp_buf += ALIGN(tmp_buf, long);
239 reach_pos = (void *)tmp_buf; 234 reach_pos = (void*)tmp_buf;
240 tmp_buf += pbytes; 235 tmp_buf += pbytes;
241 tmp_buf += ALIGN(tmp_buf, long); 236 tmp_buf += ALIGN(tmp_buf, long);
242 for (i = 0; i < tnfa->num_states; i++) 237 for (i = 0; i < tnfa->num_states; i++) {
243 { 238 reach[i].tags = (void*)tmp_buf;
244 » reach[i].tags = (void *)tmp_buf; 239 tmp_buf += xbytes;
245 » tmp_buf += xbytes; 240 reach_next[i].tags = (void*)tmp_buf;
246 » reach_next[i].tags = (void *)tmp_buf; 241 tmp_buf += xbytes;
247 » tmp_buf += xbytes; 242 }
248 }
249 } 243 }
250 244
251 for (i = 0; i < tnfa->num_states; i++) 245 for (i = 0; i < tnfa->num_states; i++)
252 reach_pos[i].pos = -1; 246 reach_pos[i].pos = -1;
253 247
254 GET_NEXT_WCHAR(); 248 GET_NEXT_WCHAR();
255 pos = 0; 249 pos = 0;
256 250
257 reach_next_i = reach_next; 251 reach_next_i = reach_next;
258 while (1) 252 while (1) {
259 { 253 /* If no match found yet, add the initial states to `reach_next'. */
260 /* If no match found yet, add the initial states to `reach_next'. */ 254 if (match_eo < 0) {
261 if (match_eo < 0) 255 trans_i = tnfa->initial;
262 » { 256 while (trans_i->state != NULL) {
263 » trans_i = tnfa->initial; 257 if (reach_pos[trans_i->state_id].pos < pos) {
264 » while (trans_i->state != NULL) 258 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) {
265 » { 259 trans_i++;
266 » if (reach_pos[trans_i->state_id].pos < pos) 260 continue;
267 » » { 261 }
268 » » if (trans_i->assertions
269 » » && CHECK_ASSERTIONS(trans_i->assertions))
270 » » {
271 » » trans_i++;
272 » » continue;
273 » » }
274 262
275 » » reach_next_i->state = trans_i->state; 263 reach_next_i->state = trans_i->state;
276 » » for (i = 0; i < num_tags; i++) 264 for (i = 0; i < num_tags; i++)
277 » » reach_next_i->tags[i] = -1; 265 reach_next_i->tags[i] = -1;
278 » » tag_i = trans_i->tags; 266 tag_i = trans_i->tags;
279 » » if (tag_i) 267 if (tag_i)
280 » » while (*tag_i >= 0) 268 while (*tag_i >= 0) {
281 » » { 269 if (*tag_i < num_tags)
282 » » » if (*tag_i < num_tags) 270 reach_next_i->tags[*tag_i] = pos;
283 » » » reach_next_i->tags[*tag_i] = pos; 271 tag_i++;
284 » » » tag_i++; 272 }
285 » » } 273 if (reach_next_i->state == tnfa->final) {
286 » » if (reach_next_i->state == tnfa->final) 274 match_eo = pos;
287 » » { 275 new_match = 1;
288 » » match_eo = pos; 276 for (i = 0; i < num_tags; i++)
289 » » new_match = 1; 277 match_tags[i] = reach_next_i->tags[i];
290 » » for (i = 0; i < num_tags; i++) 278 }
291 » » » match_tags[i] = reach_next_i->tags[i]; 279 reach_pos[trans_i->state_id].pos = pos;
292 » » } 280 reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
293 » » reach_pos[trans_i->state_id].pos = pos; 281 reach_next_i++;
294 » » reach_pos[trans_i->state_id].tags = &reach_next_i->tags; 282 }
295 » » reach_next_i++; 283 trans_i++;
296 » » } 284 }
297 » trans_i++; 285 reach_next_i->state = NULL;
298 » } 286 } else {
299 » reach_next_i->state = NULL; 287 if (num_tags == 0 || reach_next_i == reach_next)
300 » } 288 /* We have found a match. */
301 else 289 break;
302 » { 290 }
303 » if (num_tags == 0 || reach_next_i == reach_next)
304 » /* We have found a match. */
305 » break;
306 » }
307 291
308 /* Check for end of string. */ 292 /* Check for end of string. */
309 if (!next_c) break; 293 if (!next_c)
294 break;
310 295
311 GET_NEXT_WCHAR(); 296 GET_NEXT_WCHAR();
297
298 /* Swap `reach' and `reach_next'. */
299 reach_i = reach;
300 reach = reach_next;
301 reach_next = reach_i;
302
303 /* For each state in `reach', weed out states that don't fulfill the
304 minimal matching conditions. */
305 if (tnfa->num_minimals && new_match) {
306 new_match = 0;
307 reach_next_i = reach_next;
308 for (reach_i = reach; reach_i->state; reach_i++) {
309 int skip = 0;
310 for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) {
311 int end = tnfa->minimal_tags[i];
312 int start = tnfa->minimal_tags[i + 1];
313 if (end >= num_tags) {
314 skip = 1;
315 break;
316 } else if (reach_i->tags[start] == match_tags[start] &&
317 reach_i->tags[end] < match_tags[end]) {
318 skip = 1;
319 break;
320 }
321 }
322 if (!skip) {
323 reach_next_i->state = reach_i->state;
324 tmp_iptr = reach_next_i->tags;
325 reach_next_i->tags = reach_i->tags;
326 reach_i->tags = tmp_iptr;
327 reach_next_i++;
328 }
329 }
330 reach_next_i->state = NULL;
312 331
313 /* Swap `reach' and `reach_next'. */ 332 /* Swap `reach' and `reach_next'. */
314 reach_i = reach; 333 reach_i = reach;
315 reach = reach_next; 334 reach = reach_next;
316 reach_next = reach_i; 335 reach_next = reach_i;
336 }
317 337
318 /* For each state in `reach', weed out states that don't fulfill the 338 /* For each state in `reach' see if there is a transition leaving with
319 » minimal matching conditions. */ 339 the current input symbol to a state not yet in `reach_next', and
320 if (tnfa->num_minimals && new_match) 340 add the destination states to `reach_next'. */
321 » { 341 reach_next_i = reach_next;
322 » new_match = 0; 342 for (reach_i = reach; reach_i->state; reach_i++) {
323 » reach_next_i = reach_next; 343 for (trans_i = reach_i->state; trans_i->state; trans_i++) {
324 » for (reach_i = reach; reach_i->state; reach_i++) 344 /* Does this transition match the input symbol? */
325 » { 345 if (trans_i->code_min <= (tre_cint_t)prev_c &&
326 » int skip = 0; 346 trans_i->code_max >= (tre_cint_t)prev_c) {
327 » for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) 347 if (trans_i->assertions &&
328 » » { 348 (CHECK_ASSERTIONS(trans_i->assertions) ||
329 » » int end = tnfa->minimal_tags[i]; 349 CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) {
330 » » int start = tnfa->minimal_tags[i + 1]; 350 continue;
331 » » if (end >= num_tags) 351 }
332 » » {
333 » » skip = 1;
334 » » break;
335 » » }
336 » » else if (reach_i->tags[start] == match_tags[start]
337 » » » && reach_i->tags[end] < match_tags[end])
338 » » {
339 » » skip = 1;
340 » » break;
341 » » }
342 » » }
343 » if (!skip)
344 » » {
345 » » reach_next_i->state = reach_i->state;
346 » » tmp_iptr = reach_next_i->tags;
347 » » reach_next_i->tags = reach_i->tags;
348 » » reach_i->tags = tmp_iptr;
349 » » reach_next_i++;
350 » » }
351 » }
352 » reach_next_i->state = NULL;
353 352
354 » /* Swap `reach' and `reach_next'. */ 353 /* Compute the tags after this transition. */
355 » reach_i = reach; 354 for (i = 0; i < num_tags; i++)
356 » reach = reach_next; 355 tmp_tags[i] = reach_i->tags[i];
357 » reach_next = reach_i; 356 tag_i = trans_i->tags;
358 » } 357 if (tag_i != NULL)
358 while (*tag_i >= 0) {
359 if (*tag_i < num_tags)
360 tmp_tags[*tag_i] = pos;
361 tag_i++;
362 }
359 363
360 /* For each state in `reach' see if there is a transition leaving with 364 if (reach_pos[trans_i->state_id].pos < pos) {
361 » the current input symbol to a state not yet in `reach_next', and 365 /* Found an unvisited node. */
362 » add the destination states to `reach_next'. */ 366 reach_next_i->state = trans_i->state;
363 reach_next_i = reach_next; 367 tmp_iptr = reach_next_i->tags;
364 for (reach_i = reach; reach_i->state; reach_i++) 368 reach_next_i->tags = tmp_tags;
365 » { 369 tmp_tags = tmp_iptr;
366 » for (trans_i = reach_i->state; trans_i->state; trans_i++) 370 reach_pos[trans_i->state_id].pos = pos;
367 » { 371 reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
368 » /* Does this transition match the input symbol? */
369 » if (trans_i->code_min <= (tre_cint_t)prev_c &&
370 » » trans_i->code_max >= (tre_cint_t)prev_c)
371 » » {
372 » » if (trans_i->assertions
373 » » && (CHECK_ASSERTIONS(trans_i->assertions)
374 » » » || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
375 » » {
376 » » continue;
377 » » }
378 372
379 » » /* Compute the tags after this transition. */ 373 if (reach_next_i->state == tnfa->final &&
380 » » for (i = 0; i < num_tags; i++) 374 (match_eo == -1 ||
381 » » tmp_tags[i] = reach_i->tags[i]; 375 (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0]))) {
382 » » tag_i = trans_i->tags; 376 match_eo = pos;
383 » » if (tag_i != NULL) 377 new_match = 1;
384 » » while (*tag_i >= 0) 378 for (i = 0; i < num_tags; i++)
385 » » { 379 match_tags[i] = reach_next_i->tags[i];
386 » » » if (*tag_i < num_tags) 380 }
387 » » » tmp_tags[*tag_i] = pos; 381 reach_next_i++;
388 » » » tag_i++;
389 » » }
390 382
391 » » if (reach_pos[trans_i->state_id].pos < pos) 383 } else {
392 » » { 384 assert(reach_pos[trans_i->state_id].pos == pos);
393 » » /* Found an unvisited node. */ 385 /* Another path has also reached this state. We choose
394 » » reach_next_i->state = trans_i->state; 386 the winner by examining the tag values for both
395 » » tmp_iptr = reach_next_i->tags; 387 paths. */
396 » » reach_next_i->tags = tmp_tags; 388 if (tre_tag_order(num_tags, tnfa->tag_directions, tmp_tags,
397 » » tmp_tags = tmp_iptr; 389 *reach_pos[trans_i->state_id].tags)) {
398 » » reach_pos[trans_i->state_id].pos = pos; 390 /* The new path wins. */
399 » » reach_pos[trans_i->state_id].tags = &reach_next_i->tags; 391 tmp_iptr = *reach_pos[trans_i->state_id].tags;
400 392 *reach_pos[trans_i->state_id].tags = tmp_tags;
401 » » if (reach_next_i->state == tnfa->final 393 if (trans_i->state == tnfa->final) {
402 » » » && (match_eo == -1 394 match_eo = pos;
403 » » » || (num_tags > 0 395 new_match = 1;
404 » » » » && reach_next_i->tags[0] <= match_tags[0]))) 396 for (i = 0; i < num_tags; i++)
405 » » » { 397 match_tags[i] = tmp_tags[i];
406 » » » match_eo = pos; 398 }
407 » » » new_match = 1; 399 tmp_tags = tmp_iptr;
408 » » » for (i = 0; i < num_tags; i++) 400 }
409 » » » match_tags[i] = reach_next_i->tags[i]; 401 }
410 » » » } 402 }
411 » » reach_next_i++; 403 }
412
413 » » }
414 » » else
415 » » {
416 » » assert(reach_pos[trans_i->state_id].pos == pos);
417 » » /* Another path has also reached this state. We choose
418 » » » the winner by examining the tag values for both
419 » » » paths. */
420 » » if (tre_tag_order(num_tags, tnfa->tag_directions,
421 » » » » » tmp_tags,
422 » » » » » *reach_pos[trans_i->state_id].tags))
423 » » » {
424 » » » /* The new path wins. */
425 » » » tmp_iptr = *reach_pos[trans_i->state_id].tags;
426 » » » *reach_pos[trans_i->state_id].tags = tmp_tags;
427 » » » if (trans_i->state == tnfa->final)
428 » » » {
429 » » » match_eo = pos;
430 » » » new_match = 1;
431 » » » for (i = 0; i < num_tags; i++)
432 » » » » match_tags[i] = tmp_tags[i];
433 » » » }
434 » » » tmp_tags = tmp_iptr;
435 » » » }
436 » » }
437 » » }
438 » }
439 » }
440 reach_next_i->state = NULL;
441 } 404 }
405 reach_next_i->state = NULL;
406 }
442 407
443 *match_end_ofs = match_eo; 408 *match_end_ofs = match_eo;
444 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; 409 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
445 error_exit: 410 error_exit:
446 xfree(buf); 411 xfree(buf);
447 return ret; 412 return ret;
448 } 413 }
449 414
450
451
452 /*********************************************************************** 415 /***********************************************************************
453 from tre-match-backtrack.c 416 from tre-match-backtrack.c
454 ***********************************************************************/ 417 ***********************************************************************/
455 418
456 /* 419 /*
457 This matcher is for regexps that use back referencing. Regexp matching 420 This matcher is for regexps that use back referencing. Regexp matching
458 with back referencing is an NP-complete problem on the number of back 421 with back referencing is an NP-complete problem on the number of back
459 references. The easiest way to match them is to use a backtracking 422 references. The easiest way to match them is to use a backtracking
460 routine which basically goes through all possible paths in the TNFA 423 routine which basically goes through all possible paths in the TNFA
461 and chooses the one which results in the best (leftmost and longest) 424 and chooses the one which results in the best (leftmost and longest)
462 match. This can be spectacularly expensive and may run out of stack 425 match. This can be spectacularly expensive and may run out of stack
463 space, but there really is no better known generic algorithm. Quoting 426 space, but there really is no better known generic algorithm. Quoting
464 Henry Spencer from comp.compilers: 427 Henry Spencer from comp.compilers:
465 <URL: http://compilers.iecc.com/comparch/article/93-03-102> 428 <URL: http://compilers.iecc.com/comparch/article/93-03-102>
466 429
467 POSIX.2 REs require longest match, which is really exciting to 430 POSIX.2 REs require longest match, which is really exciting to
468 implement since the obsolete ("basic") variant also includes 431 implement since the obsolete ("basic") variant also includes
469 \<digit>. I haven't found a better way of tackling this than doing 432 \<digit>. I haven't found a better way of tackling this than doing
470 a preliminary match using a DFA (or simulation) on a modified RE 433 a preliminary match using a DFA (or simulation) on a modified RE
471 that just replicates subREs for \<digit>, and then doing a 434 that just replicates subREs for \<digit>, and then doing a
472 backtracking match to determine whether the subRE matches were 435 backtracking match to determine whether the subRE matches were
473 right. This can be rather slow, but I console myself with the 436 right. This can be rather slow, but I console myself with the
474 thought that people who use \<digit> deserve very slow execution. 437 thought that people who use \<digit> deserve very slow execution.
475 (Pun unintentional but very appropriate.) 438 (Pun unintentional but very appropriate.)
476 439
477 */ 440 */
478 441
479 typedef struct { 442 typedef struct {
480 int pos; 443 int pos;
481 const char *str_byte; 444 const char* str_byte;
482 tre_tnfa_transition_t *state; 445 tre_tnfa_transition_t* state;
483 int state_id; 446 int state_id;
484 int next_c; 447 int next_c;
485 int *tags; 448 int* tags;
486 #ifdef TRE_MBSTATE 449 #ifdef TRE_MBSTATE
487 mbstate_t mbstate; 450 mbstate_t mbstate;
488 #endif /* TRE_MBSTATE */ 451 #endif /* TRE_MBSTATE */
489 } tre_backtrack_item_t; 452 } tre_backtrack_item_t;
490 453
491 typedef struct tre_backtrack_struct { 454 typedef struct tre_backtrack_struct {
492 tre_backtrack_item_t item; 455 tre_backtrack_item_t item;
493 struct tre_backtrack_struct *prev; 456 struct tre_backtrack_struct* prev;
494 struct tre_backtrack_struct *next; 457 struct tre_backtrack_struct* next;
495 } *tre_backtrack_t; 458 } * tre_backtrack_t;
496 459
497 #ifdef TRE_MBSTATE 460 #ifdef TRE_MBSTATE
498 #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) 461 #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
499 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate 462 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
500 #else /* !TRE_MBSTATE */ 463 #else /* !TRE_MBSTATE */
501 #define BT_STACK_MBSTATE_IN 464 #define BT_STACK_MBSTATE_IN
502 #define BT_STACK_MBSTATE_OUT 465 #define BT_STACK_MBSTATE_OUT
503 #endif /* !TRE_MBSTATE */ 466 #endif /* !TRE_MBSTATE */
504 467
505 #define tre_bt_mem_new» » tre_mem_new 468 #define tre_bt_mem_new tre_mem_new
506 #define tre_bt_mem_alloc» tre_mem_alloc 469 #define tre_bt_mem_alloc tre_mem_alloc
507 #define tre_bt_mem_destroy» tre_mem_destroy 470 #define tre_bt_mem_destroy tre_mem_destroy
508 471
472 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, \
473 _tags, _mbstate) \
474 do { \
475 int i; \
476 if (!stack->next) { \
477 tre_backtrack_t s; \
478 s = tre_bt_mem_alloc(mem, sizeof(*s)); \
479 if (!s) { \
480 tre_bt_mem_destroy(mem); \
481 if (tags) \
482 xfree(tags); \
483 if (pmatch) \
484 xfree(pmatch); \
485 if (states_seen) \
486 xfree(states_seen); \
487 return REG_ESPACE; \
488 } \
489 s->prev = stack; \
490 s->next = NULL; \
491 s->item.tags = tre_bt_mem_alloc(mem, sizeof(*tags) * tnfa->num_tags); \
492 if (!s->item.tags) { \
493 tre_bt_mem_destroy(mem); \
494 if (tags) \
495 xfree(tags); \
496 if (pmatch) \
497 xfree(pmatch); \
498 if (states_seen) \
499 xfree(states_seen); \
500 return REG_ESPACE; \
501 } \
502 stack->next = s; \
503 stack = s; \
504 } else \
505 stack = stack->next; \
506 stack->item.pos = (_pos); \
507 stack->item.str_byte = (_str_byte); \
508 stack->item.state = (_state); \
509 stack->item.state_id = (_state_id); \
510 stack->item.next_c = (_next_c); \
511 for (i = 0; i < tnfa->num_tags; i++) \
512 stack->item.tags[i] = (_tags)[i]; \
513 BT_STACK_MBSTATE_IN; \
514 } while (0)
509 515
510 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _t ags, _mbstate) \ 516 #define BT_STACK_POP() \
511 do» » » » » » » » » \ 517 do { \
512 {» » » » » » » » » \ 518 int i; \
513 int i;» » » » » » » » \ 519 assert(stack->prev); \
514 if (!stack->next)»» » » » » » \ 520 pos = stack->item.pos; \
515 » {» » » » » » » » \ 521 str_byte = stack->item.str_byte; \
516 » tre_backtrack_t s;» » » » » » \ 522 state = stack->item.state; \
517 » s = tre_bt_mem_alloc(mem, sizeof(*s));» » » \ 523 next_c = stack->item.next_c; \
518 » if (!s)» » » » » » » \ 524 for (i = 0; i < tnfa->num_tags; i++) \
519 » {» » » » » » » » \ 525 tags[i] = stack->item.tags[i]; \
520 » tre_bt_mem_destroy(mem);» » » » » \ 526 BT_STACK_MBSTATE_OUT; \
521 » if (tags)»» » » » » » \ 527 stack = stack->prev; \
522 » » xfree(tags);» » » » » » \ 528 } while (0)
523 » if (pmatch)» » » » » » \
524 » » xfree(pmatch);» » » » » » \
525 » if (states_seen)» » » » » » \
526 » » xfree(states_seen);» » » » » \
527 » return REG_ESPACE;» » » » » \
528 » }» » » » » » » » \
529 » s->prev = stack;» » » » » » \
530 » s->next = NULL;» » » » » » \
531 » s->item.tags = tre_bt_mem_alloc(mem,» » » » \
532 » » » » » sizeof(*tags) * tnfa->num_tags); \
533 » if (!s->item.tags)» » » » » » \
534 » {» » » » » » » » \
535 » tre_bt_mem_destroy(mem);» » » » » \
536 » if (tags)»» » » » » » \
537 » » xfree(tags);» » » » » » \
538 » if (pmatch)» » » » » » \
539 » » xfree(pmatch);» » » » » » \
540 » if (states_seen)» » » » » » \
541 » » xfree(states_seen);» » » » » \
542 » return REG_ESPACE;» » » » » \
543 » }» » » » » » » » \
544 » stack->next = s;» » » » » » \
545 » stack = s;» » » » » » » \
546 » }» » » » » » » » \
547 else» » » » » » » » \
548 » stack = stack->next;» » » » » » \
549 stack->item.pos = (_pos);»» » » » » \
550 stack->item.str_byte = (_str_byte);» » » » \
551 stack->item.state = (_state);» » » » » \
552 stack->item.state_id = (_state_id);» » » » \
553 stack->item.next_c = (_next_c);» » » » » \
554 for (i = 0; i < tnfa->num_tags; i++)» » » » \
555 » stack->item.tags[i] = (_tags)[i];» » » » \
556 BT_STACK_MBSTATE_IN;» » » » » » \
557 }» » » » » » » » » \
558 while (0)
559
560 #define BT_STACK_POP()» » » » » » » \
561 do» » » » » » » » » \
562 {» » » » » » » » » \
563 int i;» » » » » » » » \
564 assert(stack->prev);» » » » » » \
565 pos = stack->item.pos;» » » » » » \
566 str_byte = stack->item.str_byte;» » » » » \
567 state = stack->item.state;» » » » » \
568 next_c = stack->item.next_c;» » » » » \
569 for (i = 0; i < tnfa->num_tags; i++)» » » » \
570 » tags[i] = stack->item.tags[i];» » » » » \
571 BT_STACK_MBSTATE_OUT;» » » » » » \
572 stack = stack->prev;» » » » » » \
573 }» » » » » » » » » \
574 while (0)
575 529
576 #undef MIN 530 #undef MIN
577 #define MIN(a, b) ((a) <= (b) ? (a) : (b)) 531 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
578 532
579 static reg_errcode_t 533 static reg_errcode_t tre_tnfa_run_backtrack(const tre_tnfa_t* tnfa,
580 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, 534 const void* string,
581 » » int *match_tags, int eflags, int *match_end_ofs) 535 int* match_tags,
582 { 536 int eflags,
537 int* match_end_ofs) {
583 /* State variables required by GET_NEXT_WCHAR. */ 538 /* State variables required by GET_NEXT_WCHAR. */
584 tre_char_t prev_c = 0, next_c = 0; 539 tre_char_t prev_c = 0, next_c = 0;
585 const char *str_byte = string; 540 const char* str_byte = string;
586 int pos = 0; 541 int pos = 0;
587 int pos_add_next = 1; 542 int pos_add_next = 1;
588 #ifdef TRE_MBSTATE 543 #ifdef TRE_MBSTATE
589 mbstate_t mbstate; 544 mbstate_t mbstate;
590 #endif /* TRE_MBSTATE */ 545 #endif /* TRE_MBSTATE */
591 int reg_notbol = eflags & REG_NOTBOL; 546 int reg_notbol = eflags & REG_NOTBOL;
592 int reg_noteol = eflags & REG_NOTEOL; 547 int reg_noteol = eflags & REG_NOTEOL;
593 int reg_newline = tnfa->cflags & REG_NEWLINE; 548 int reg_newline = tnfa->cflags & REG_NEWLINE;
594 549
595 /* These are used to remember the necessary values of the above 550 /* These are used to remember the necessary values of the above
596 variables to return to the position where the current search 551 variables to return to the position where the current search
597 started from. */ 552 started from. */
598 int next_c_start; 553 int next_c_start;
599 const char *str_byte_start; 554 const char* str_byte_start;
600 int pos_start = -1; 555 int pos_start = -1;
601 #ifdef TRE_MBSTATE 556 #ifdef TRE_MBSTATE
602 mbstate_t mbstate_start; 557 mbstate_t mbstate_start;
603 #endif /* TRE_MBSTATE */ 558 #endif /* TRE_MBSTATE */
604 559
605 /* End offset of best match so far, or -1 if no match found yet. */ 560 /* End offset of best match so far, or -1 if no match found yet. */
606 int match_eo = -1; 561 int match_eo = -1;
607 /* Tag arrays. */ 562 /* Tag arrays. */
608 int *next_tags, *tags = NULL; 563 int *next_tags, *tags = NULL;
609 /* Current TNFA state. */ 564 /* Current TNFA state. */
610 tre_tnfa_transition_t *state; 565 tre_tnfa_transition_t* state;
611 int *states_seen = NULL; 566 int* states_seen = NULL;
612 567
613 /* Memory allocator to for allocating the backtracking stack. */ 568 /* Memory allocator to for allocating the backtracking stack. */
614 tre_mem_t mem = tre_bt_mem_new(); 569 tre_mem_t mem = tre_bt_mem_new();
615 570
616 /* The backtracking stack. */ 571 /* The backtracking stack. */
617 tre_backtrack_t stack; 572 tre_backtrack_t stack;
618 573
619 tre_tnfa_transition_t *trans_i; 574 tre_tnfa_transition_t* trans_i;
620 regmatch_t *pmatch = NULL; 575 regmatch_t* pmatch = NULL;
621 int ret; 576 int ret;
622 577
623 #ifdef TRE_MBSTATE 578 #ifdef TRE_MBSTATE
624 memset(&mbstate, '\0', sizeof(mbstate)); 579 memset(&mbstate, '\0', sizeof(mbstate));
625 #endif /* TRE_MBSTATE */ 580 #endif /* TRE_MBSTATE */
626 581
627 if (!mem) 582 if (!mem)
628 return REG_ESPACE; 583 return REG_ESPACE;
629 stack = tre_bt_mem_alloc(mem, sizeof(*stack)); 584 stack = tre_bt_mem_alloc(mem, sizeof(*stack));
630 if (!stack) 585 if (!stack) {
631 { 586 ret = REG_ESPACE;
587 goto error_exit;
588 }
589 stack->prev = NULL;
590 stack->next = NULL;
591
592 if (tnfa->num_tags) {
593 tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
594 if (!tags) {
632 ret = REG_ESPACE; 595 ret = REG_ESPACE;
633 goto error_exit; 596 goto error_exit;
634 } 597 }
635 stack->prev = NULL; 598 }
636 stack->next = NULL; 599 if (tnfa->num_submatches) {
600 pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
601 if (!pmatch) {
602 ret = REG_ESPACE;
603 goto error_exit;
604 }
605 }
606 if (tnfa->num_states) {
607 states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
608 if (!states_seen) {
609 ret = REG_ESPACE;
610 goto error_exit;
611 }
612 }
637 613
638 if (tnfa->num_tags) 614 retry : {
639 { 615 int i;
640 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 616 for (i = 0; i < tnfa->num_tags; i++) {
641 if (!tags) 617 tags[i] = -1;
642 » { 618 if (match_tags)
643 » ret = REG_ESPACE; 619 match_tags[i] = -1;
644 » goto error_exit;
645 » }
646 }
647 if (tnfa->num_submatches)
648 {
649 pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
650 if (!pmatch)
651 » {
652 » ret = REG_ESPACE;
653 » goto error_exit;
654 » }
655 }
656 if (tnfa->num_states)
657 {
658 states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
659 if (!states_seen)
660 » {
661 » ret = REG_ESPACE;
662 » goto error_exit;
663 » }
664 }
665
666 retry:
667 {
668 int i;
669 for (i = 0; i < tnfa->num_tags; i++)
670 {
671 » tags[i] = -1;
672 » if (match_tags)
673 » match_tags[i] = -1;
674 }
675 for (i = 0; i < tnfa->num_states; i++)
676 states_seen[i] = 0;
677 } 620 }
621 for (i = 0; i < tnfa->num_states; i++)
622 states_seen[i] = 0;
623 }
678 624
679 state = NULL; 625 state = NULL;
680 pos = pos_start; 626 pos = pos_start;
681 GET_NEXT_WCHAR(); 627 GET_NEXT_WCHAR();
682 pos_start = pos; 628 pos_start = pos;
683 next_c_start = next_c; 629 next_c_start = next_c;
684 str_byte_start = str_byte; 630 str_byte_start = str_byte;
685 #ifdef TRE_MBSTATE 631 #ifdef TRE_MBSTATE
686 mbstate_start = mbstate; 632 mbstate_start = mbstate;
687 #endif /* TRE_MBSTATE */ 633 #endif /* TRE_MBSTATE */
688 634
689 /* Handle initial states. */ 635 /* Handle initial states. */
690 next_tags = NULL; 636 next_tags = NULL;
691 for (trans_i = tnfa->initial; trans_i->state; trans_i++) 637 for (trans_i = tnfa->initial; trans_i->state; trans_i++) {
692 { 638 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) {
693 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) 639 continue;
694 » {
695 » continue;
696 » }
697 if (state == NULL)
698 » {
699 » /* Start from this state. */
700 » state = trans_i->state;
701 » next_tags = trans_i->tags;
702 » }
703 else
704 » {
705 » /* Backtrack to this state. */
706 » BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
707 » » » trans_i->state_id, next_c, tags, mbstate);
708 » {
709 » int *tmp = trans_i->tags;
710 » if (tmp)
711 » while (*tmp >= 0)
712 » » stack->item.tags[*tmp++] = pos;
713 » }
714 » }
715 } 640 }
641 if (state == NULL) {
642 /* Start from this state. */
643 state = trans_i->state;
644 next_tags = trans_i->tags;
645 } else {
646 /* Backtrack to this state. */
647 BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id, next_c,
648 tags, mbstate);
649 {
650 int* tmp = trans_i->tags;
651 if (tmp)
652 while (*tmp >= 0)
653 stack->item.tags[*tmp++] = pos;
654 }
655 }
656 }
716 657
717 if (next_tags) 658 if (next_tags)
718 for (; *next_tags >= 0; next_tags++) 659 for (; *next_tags >= 0; next_tags++)
719 tags[*next_tags] = pos; 660 tags[*next_tags] = pos;
720 661
721
722 if (state == NULL) 662 if (state == NULL)
723 goto backtrack; 663 goto backtrack;
724 664
725 while (1) 665 while (1) {
726 { 666 tre_tnfa_transition_t* next_state;
727 tre_tnfa_transition_t *next_state; 667 int empty_br_match;
728 int empty_br_match;
729 668
730 if (state == tnfa->final) 669 if (state == tnfa->final) {
731 » { 670 if (match_eo < pos || (match_eo == pos && match_tags &&
732 » if (match_eo < pos 671 tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
733 » || (match_eo == pos 672 tags, match_tags))) {
734 » » && match_tags 673 int i;
735 » » && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, 674 /* This match wins the previous match. */
736 » » » » tags, match_tags))) 675 match_eo = pos;
737 » { 676 if (match_tags)
738 » int i; 677 for (i = 0; i < tnfa->num_tags; i++)
739 » /* This match wins the previous match. */ 678 match_tags[i] = tags[i];
740 » match_eo = pos; 679 }
741 » if (match_tags) 680 /* Our TNFAs never have transitions leaving from the final state,
742 » » for (i = 0; i < tnfa->num_tags; i++) 681 so we jump right to backtracking. */
743 » » match_tags[i] = tags[i]; 682 goto backtrack;
744 » } 683 }
745 » /* Our TNFAs never have transitions leaving from the final state,
746 » so we jump right to backtracking. */
747 » goto backtrack;
748 » }
749 684
750 /* Go to the next character in the input string. */ 685 /* Go to the next character in the input string. */
751 empty_br_match = 0; 686 empty_br_match = 0;
752 trans_i = state; 687 trans_i = state;
753 if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) 688 if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) {
754 » { 689 /* This is a back reference state. All transitions leaving from
755 » /* This is a back reference state. All transitions leaving from 690 this state have the same back reference "assertion". Instead
756 » this state have the same back reference "assertion". Instead 691 of reading the next character, we match the back reference. */
757 » of reading the next character, we match the back reference. */ 692 int so, eo, bt = trans_i->u.backref;
758 » int so, eo, bt = trans_i->u.backref; 693 int bt_len;
759 » int bt_len; 694 int result;
760 » int result;
761 695
762 » /* Get the substring we need to match against. Remember to 696 /* Get the substring we need to match against. Remember to
763 » turn off REG_NOSUB temporarily. */ 697 turn off REG_NOSUB temporarily. */
764 » tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, 698 tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, tnfa, tags,
765 » » » tnfa, tags, pos); 699 pos);
766 » so = pmatch[bt].rm_so; 700 so = pmatch[bt].rm_so;
767 » eo = pmatch[bt].rm_eo; 701 eo = pmatch[bt].rm_eo;
768 » bt_len = eo - so; 702 bt_len = eo - so;
769 703
770 » result = strncmp((const char*)string + so, str_byte - 1, 704 result = strncmp((const char*)string + so, str_byte - 1, (size_t)bt_len);
771 » » » » (size_t)bt_len);
772 705
773 » if (result == 0) 706 if (result == 0) {
774 » { 707 /* Back reference matched. Check for infinite loop. */
775 » /* Back reference matched. Check for infinite loop. */ 708 if (bt_len == 0)
776 » if (bt_len == 0) 709 empty_br_match = 1;
777 » » empty_br_match = 1; 710 if (empty_br_match && states_seen[trans_i->state_id]) {
778 » if (empty_br_match && states_seen[trans_i->state_id]) 711 goto backtrack;
779 » » { 712 }
780 » » goto backtrack;
781 » » }
782 713
783 » states_seen[trans_i->state_id] = empty_br_match; 714 states_seen[trans_i->state_id] = empty_br_match;
784 715
785 » /* Advance in input string and resync `prev_c', `next_c' 716 /* Advance in input string and resync `prev_c', `next_c'
786 » » and pos. */ 717 and pos. */
787 » str_byte += bt_len - 1; 718 str_byte += bt_len - 1;
788 » pos += bt_len - 1; 719 pos += bt_len - 1;
789 » GET_NEXT_WCHAR(); 720 GET_NEXT_WCHAR();
790 » } 721 } else {
791 » else 722 goto backtrack;
792 » { 723 }
793 » goto backtrack; 724 } else {
794 » } 725 /* Check for end of string. */
795 » } 726 if (next_c == L'\0')
796 else 727 goto backtrack;
797 » {
798 » /* Check for end of string. */
799 » if (next_c == L'\0')
800 » » goto backtrack;
801 728
802 » /* Read the next character. */ 729 /* Read the next character. */
803 » GET_NEXT_WCHAR(); 730 GET_NEXT_WCHAR();
804 » } 731 }
805 732
806 next_state = NULL; 733 next_state = NULL;
807 for (trans_i = state; trans_i->state; trans_i++) 734 for (trans_i = state; trans_i->state; trans_i++) {
808 » { 735 if (trans_i->code_min <= (tre_cint_t)prev_c &&
809 » if (trans_i->code_min <= (tre_cint_t)prev_c 736 trans_i->code_max >= (tre_cint_t)prev_c) {
810 » && trans_i->code_max >= (tre_cint_t)prev_c) 737 if (trans_i->assertions &&
811 » { 738 (CHECK_ASSERTIONS(trans_i->assertions) ||
812 » if (trans_i->assertions 739 CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) {
813 » » && (CHECK_ASSERTIONS(trans_i->assertions) 740 continue;
814 » » || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) 741 }
815 » » {
816 » » continue;
817 » » }
818 742
819 » if (next_state == NULL) 743 if (next_state == NULL) {
820 » » { 744 /* First matching transition. */
821 » » /* First matching transition. */ 745 next_state = trans_i->state;
822 » » next_state = trans_i->state; 746 next_tags = trans_i->tags;
823 » » next_tags = trans_i->tags; 747 } else {
824 » » } 748 /* Second matching transition. We may need to backtrack here
825 » else 749 to take this transition instead of the first one, so we
826 » » { 750 push this transition in the backtracking stack so we can
827 » » /* Second matching transition. We may need to backtrack here 751 jump back here if needed. */
828 » » to take this transition instead of the first one, so we 752 BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, trans_i->state_id,
829 » » push this transition in the backtracking stack so we can 753 next_c, tags, mbstate);
830 » » jump back here if needed. */ 754 {
831 » » BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, 755 int* tmp;
832 » » » » trans_i->state_id, next_c, tags, mbstate); 756 for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
833 » » { 757 stack->item.tags[*tmp] = pos;
834 » » int *tmp; 758 }
835 » » for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) 759 #if 0 /* XXX - it's important not to look at all transitions here to keep \
836 » » stack->item.tags[*tmp] = pos; 760 the stack small! */
837 » » }
838 #if 0 /* XXX - it's important not to look at all transitions here to keep
839 » the stack small! */
840 break; 761 break;
841 #endif 762 #endif
842 » » } 763 }
843 » } 764 }
844 » } 765 }
845 766
846 if (next_state != NULL) 767 if (next_state != NULL) {
847 » { 768 /* Matching transitions were found. Take the first one. */
848 » /* Matching transitions were found. Take the first one. */ 769 state = next_state;
849 » state = next_state;
850 770
851 » /* Update the tag values. */ 771 /* Update the tag values. */
852 » if (next_tags) 772 if (next_tags)
853 » while (*next_tags >= 0) 773 while (*next_tags >= 0)
854 » tags[*next_tags++] = pos; 774 tags[*next_tags++] = pos;
855 » } 775 } else {
856 else 776 backtrack:
857 » { 777 /* A matching transition was not found. Try to backtrack. */
858 » backtrack: 778 if (stack->prev) {
859 » /* A matching transition was not found. Try to backtrack. */ 779 if (stack->item.state->assertions & ASSERT_BACKREF) {
860 » if (stack->prev) 780 states_seen[stack->item.state_id] = 0;
861 » { 781 }
862 » if (stack->item.state->assertions & ASSERT_BACKREF)
863 » » {
864 » » states_seen[stack->item.state_id] = 0;
865 » » }
866 782
867 » BT_STACK_POP(); 783 BT_STACK_POP();
868 » } 784 } else if (match_eo < 0) {
869 » else if (match_eo < 0) 785 /* Try starting from a later position in the input string. */
870 » { 786 /* Check for end of string. */
871 » /* Try starting from a later position in the input string. */ 787 if (next_c == L'\0') {
872 » /* Check for end of string. */ 788 break;
873 » if (next_c == L'\0') 789 }
874 » » { 790 next_c = next_c_start;
875 » » break;
876 » » }
877 » next_c = next_c_start;
878 #ifdef TRE_MBSTATE 791 #ifdef TRE_MBSTATE
879 » mbstate = mbstate_start; 792 mbstate = mbstate_start;
880 #endif /* TRE_MBSTATE */ 793 #endif /* TRE_MBSTATE */
881 » str_byte = str_byte_start; 794 str_byte = str_byte_start;
882 » goto retry; 795 goto retry;
883 » } 796 } else {
884 » else 797 break;
885 » { 798 }
886 » break;
887 » }
888 » }
889 } 799 }
800 }
890 801
891 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; 802 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
892 *match_end_ofs = match_eo; 803 *match_end_ofs = match_eo;
893 804
894 error_exit: 805 error_exit:
895 tre_bt_mem_destroy(mem); 806 tre_bt_mem_destroy(mem);
896 #ifndef TRE_USE_ALLOCA 807 #ifndef TRE_USE_ALLOCA
897 if (tags) 808 if (tags)
898 xfree(tags); 809 xfree(tags);
899 if (pmatch) 810 if (pmatch)
900 xfree(pmatch); 811 xfree(pmatch);
901 if (states_seen) 812 if (states_seen)
902 xfree(states_seen); 813 xfree(states_seen);
903 #endif /* !TRE_USE_ALLOCA */ 814 #endif /* !TRE_USE_ALLOCA */
904 815
905 return ret; 816 return ret;
906 } 817 }
907 818
908 /*********************************************************************** 819 /***********************************************************************
909 from regexec.c 820 from regexec.c
910 ***********************************************************************/ 821 ***********************************************************************/
911 822
912 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match 823 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
913 endpoint values. */ 824 endpoint values. */
914 static void 825 static void tre_fill_pmatch(size_t nmatch,
915 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 826 regmatch_t pmatch[],
916 » » const tre_tnfa_t *tnfa, int *tags, int match_eo) 827 int cflags,
917 { 828 const tre_tnfa_t* tnfa,
918 tre_submatch_data_t *submatch_data; 829 int* tags,
830 int match_eo) {
831 tre_submatch_data_t* submatch_data;
919 unsigned int i, j; 832 unsigned int i, j;
920 int *parents; 833 int* parents;
921 834
922 i = 0; 835 i = 0;
923 if (match_eo >= 0 && !(cflags & REG_NOSUB)) 836 if (match_eo >= 0 && !(cflags & REG_NOSUB)) {
924 { 837 /* Construct submatch offsets from the tags. */
925 /* Construct submatch offsets from the tags. */ 838 submatch_data = tnfa->submatch_data;
926 submatch_data = tnfa->submatch_data; 839 while (i < tnfa->num_submatches && i < nmatch) {
927 while (i < tnfa->num_submatches && i < nmatch) 840 if (submatch_data[i].so_tag == tnfa->end_tag)
928 » { 841 pmatch[i].rm_so = match_eo;
929 » if (submatch_data[i].so_tag == tnfa->end_tag) 842 else
930 » pmatch[i].rm_so = match_eo; 843 pmatch[i].rm_so = tags[submatch_data[i].so_tag];
931 » else
932 » pmatch[i].rm_so = tags[submatch_data[i].so_tag];
933 844
934 » if (submatch_data[i].eo_tag == tnfa->end_tag) 845 if (submatch_data[i].eo_tag == tnfa->end_tag)
935 » pmatch[i].rm_eo = match_eo; 846 pmatch[i].rm_eo = match_eo;
936 » else 847 else
937 » pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; 848 pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
938 849
939 » /* If either of the endpoints were not used, this submatch 850 /* If either of the endpoints were not used, this submatch
940 » was not part of the match. */ 851 was not part of the match. */
941 » if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) 852 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
942 » pmatch[i].rm_so = pmatch[i].rm_eo = -1; 853 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
943 854
944 i++;
945 }
946 /* Reset all submatches that are not within all of their parent
947 submatches. */
948 i = 0;
949 while (i < tnfa->num_submatches && i < nmatch)
950 {
951 if (pmatch[i].rm_eo == -1)
952 assert(pmatch[i].rm_so == -1);
953 assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
954
955 parents = submatch_data[i].parents;
956 if (parents != NULL)
957 for (j = 0; parents[j] >= 0; j++)
958 {
959 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
960 || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
961 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
962 }
963 i++;
964 }
965 }
966
967 while (i < nmatch)
968 {
969 pmatch[i].rm_so = -1;
970 pmatch[i].rm_eo = -1;
971 i++; 855 i++;
972 } 856 }
857 /* Reset all submatches that are not within all of their parent
858 submatches. */
859 i = 0;
860 while (i < tnfa->num_submatches && i < nmatch) {
861 if (pmatch[i].rm_eo == -1)
862 assert(pmatch[i].rm_so == -1);
863 assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
864
865 parents = submatch_data[i].parents;
866 if (parents != NULL)
867 for (j = 0; parents[j] >= 0; j++) {
868 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so ||
869 pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
870 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
871 }
872 i++;
873 }
874 }
875
876 while (i < nmatch) {
877 pmatch[i].rm_so = -1;
878 pmatch[i].rm_eo = -1;
879 i++;
880 }
973 } 881 }
974 882
975
976 /* 883 /*
977 Wrapper functions for POSIX compatible regexp matching. 884 Wrapper functions for POSIX compatible regexp matching.
978 */ 885 */
979 886
980 int 887 int regexec(const regex_t* restrict preg,
981 regexec(const regex_t *restrict preg, const char *restrict string, 888 const char* restrict string,
982 » size_t nmatch, regmatch_t pmatch[restrict], int eflags) 889 size_t nmatch,
983 { 890 regmatch_t pmatch[restrict],
984 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 891 int eflags) {
892 tre_tnfa_t* tnfa = (void*)preg->TRE_REGEX_T_FIELD;
985 reg_errcode_t status; 893 reg_errcode_t status;
986 int *tags = NULL, eo; 894 int *tags = NULL, eo;
987 if (tnfa->cflags & REG_NOSUB) nmatch = 0; 895 if (tnfa->cflags & REG_NOSUB)
988 if (tnfa->num_tags > 0 && nmatch > 0) 896 nmatch = 0;
989 { 897 if (tnfa->num_tags > 0 && nmatch > 0) {
990 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 898 tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
991 if (tags == NULL) 899 if (tags == NULL)
992 » return REG_ESPACE; 900 return REG_ESPACE;
993 } 901 }
994 902
995 /* Dispatch to the appropriate matcher. */ 903 /* Dispatch to the appropriate matcher. */
996 if (tnfa->have_backrefs) 904 if (tnfa->have_backrefs) {
997 { 905 /* The regex has back references, use the backtracking matcher. */
998 /* The regex has back references, use the backtracking matcher. */ 906 status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
999 status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); 907 } else {
1000 } 908 /* Exact matching, no back references, use the parallel matcher. */
1001 else 909 status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
1002 { 910 }
1003 /* Exact matching, no back references, use the parallel matcher. */
1004 status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
1005 }
1006 911
1007 if (status == REG_OK) 912 if (status == REG_OK)
1008 /* A match was found, so fill the submatch registers. */ 913 /* A match was found, so fill the submatch registers. */
1009 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); 914 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
1010 if (tags) 915 if (tags)
1011 xfree(tags); 916 xfree(tags);
1012 return status; 917 return status;
1013 } 918 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698