OLD | NEW |
1 /* | 1 /* |
2 regcomp.c - TRE POSIX compatible regex compilation functions. | 2 regcomp.c - TRE POSIX compatible regex compilation functions. |
3 | 3 |
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> | 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> |
5 All rights reserved. | 5 All rights reserved. |
6 | 6 |
7 Redistribution and use in source and binary forms, with or without | 7 Redistribution and use in source and binary forms, with or without |
8 modification, are permitted provided that the following conditions | 8 modification, are permitted provided that the following conditions |
9 are met: | 9 are met: |
10 | 10 |
(...skipping 30 matching lines...) Expand all Loading... |
41 #include <assert.h> | 41 #include <assert.h> |
42 | 42 |
43 /*********************************************************************** | 43 /*********************************************************************** |
44 from tre-compile.h | 44 from tre-compile.h |
45 ***********************************************************************/ | 45 ***********************************************************************/ |
46 | 46 |
47 typedef struct { | 47 typedef struct { |
48 int position; | 48 int position; |
49 int code_min; | 49 int code_min; |
50 int code_max; | 50 int code_max; |
51 int *tags; | 51 int* tags; |
52 int assertions; | 52 int assertions; |
53 tre_ctype_t class; | 53 tre_ctype_t class; |
54 tre_ctype_t *neg_classes; | 54 tre_ctype_t* neg_classes; |
55 int backref; | 55 int backref; |
56 } tre_pos_and_tags_t; | 56 } tre_pos_and_tags_t; |
57 | 57 |
58 | |
59 /*********************************************************************** | 58 /*********************************************************************** |
60 from tre-ast.c and tre-ast.h | 59 from tre-ast.c and tre-ast.h |
61 ***********************************************************************/ | 60 ***********************************************************************/ |
62 | 61 |
63 /* The different AST node types. */ | 62 /* The different AST node types. */ |
64 typedef enum { | 63 typedef enum { LITERAL, CATENATION, ITERATION, UNION } tre_ast_type_t; |
65 LITERAL, | |
66 CATENATION, | |
67 ITERATION, | |
68 UNION | |
69 } tre_ast_type_t; | |
70 | 64 |
71 /* Special subtypes of TRE_LITERAL. */ | 65 /* Special subtypes of TRE_LITERAL. */ |
72 #define EMPTY» -1 /* Empty leaf (denotes empty string). */ | 66 #define EMPTY -1 /* Empty leaf (denotes empty string). */ |
73 #define ASSERTION -2 /* Assertion leaf. */ | 67 #define ASSERTION -2 /* Assertion leaf. */ |
74 #define TAG» -3 /* Tag leaf. */ | 68 #define TAG -3 /* Tag leaf. */ |
75 #define BACKREF» -4 /* Back reference leaf. */ | 69 #define BACKREF -4 /* Back reference leaf. */ |
76 | 70 |
77 #define IS_SPECIAL(x)» ((x)->code_min < 0) | 71 #define IS_SPECIAL(x) ((x)->code_min < 0) |
78 #define IS_EMPTY(x)» ((x)->code_min == EMPTY) | 72 #define IS_EMPTY(x) ((x)->code_min == EMPTY) |
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) | 73 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) |
80 #define IS_TAG(x)» ((x)->code_min == TAG) | 74 #define IS_TAG(x) ((x)->code_min == TAG) |
81 #define IS_BACKREF(x)» ((x)->code_min == BACKREF) | 75 #define IS_BACKREF(x) ((x)->code_min == BACKREF) |
82 | |
83 | 76 |
84 /* A generic AST node. All AST nodes consist of this node on the top | 77 /* A generic AST node. All AST nodes consist of this node on the top |
85 level with `obj' pointing to the actual content. */ | 78 level with `obj' pointing to the actual content. */ |
86 typedef struct { | 79 typedef struct { |
87 tre_ast_type_t type; /* Type of the node. */ | 80 tre_ast_type_t type; /* Type of the node. */ |
88 void *obj; /* Pointer to actual node. */ | 81 void* obj; /* Pointer to actual node. */ |
89 int nullable; | 82 int nullable; |
90 int submatch_id; | 83 int submatch_id; |
91 int num_submatches; | 84 int num_submatches; |
92 int num_tags; | 85 int num_tags; |
93 tre_pos_and_tags_t *firstpos; | 86 tre_pos_and_tags_t* firstpos; |
94 tre_pos_and_tags_t *lastpos; | 87 tre_pos_and_tags_t* lastpos; |
95 } tre_ast_node_t; | 88 } tre_ast_node_t; |
96 | 89 |
97 | |
98 /* A "literal" node. These are created for assertions, back references, | 90 /* A "literal" node. These are created for assertions, back references, |
99 tags, matching parameter settings, and all expressions that match one | 91 tags, matching parameter settings, and all expressions that match one |
100 character. */ | 92 character. */ |
101 typedef struct { | 93 typedef struct { |
102 long code_min; | 94 long code_min; |
103 long code_max; | 95 long code_max; |
104 int position; | 96 int position; |
105 tre_ctype_t class; | 97 tre_ctype_t class; |
106 tre_ctype_t *neg_classes; | 98 tre_ctype_t* neg_classes; |
107 } tre_literal_t; | 99 } tre_literal_t; |
108 | 100 |
109 /* A "catenation" node. These are created when two regexps are concatenated. | 101 /* A "catenation" node. These are created when two regexps are concatenated. |
110 If there are more than one subexpressions in sequence, the `left' part | 102 If there are more than one subexpressions in sequence, the `left' part |
111 holds all but the last, and `right' part holds the last subexpression | 103 holds all but the last, and `right' part holds the last subexpression |
112 (catenation is left associative). */ | 104 (catenation is left associative). */ |
113 typedef struct { | 105 typedef struct { |
114 tre_ast_node_t *left; | 106 tre_ast_node_t* left; |
115 tre_ast_node_t *right; | 107 tre_ast_node_t* right; |
116 } tre_catenation_t; | 108 } tre_catenation_t; |
117 | 109 |
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" | 110 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" |
119 operators. */ | 111 operators. */ |
120 typedef struct { | 112 typedef struct { |
121 /* Subexpression to match. */ | 113 /* Subexpression to match. */ |
122 tre_ast_node_t *arg; | 114 tre_ast_node_t* arg; |
123 /* Minimum number of consecutive matches. */ | 115 /* Minimum number of consecutive matches. */ |
124 int min; | 116 int min; |
125 /* Maximum number of consecutive matches. */ | 117 /* Maximum number of consecutive matches. */ |
126 int max; | 118 int max; |
127 /* If 0, match as many characters as possible, if 1 match as few as | 119 /* If 0, match as many characters as possible, if 1 match as few as |
128 possible. Note that this does not always mean the same thing as | 120 possible. Note that this does not always mean the same thing as |
129 matching as many/few repetitions as possible. */ | 121 matching as many/few repetitions as possible. */ |
130 unsigned int minimal:1; | 122 unsigned int minimal : 1; |
131 } tre_iteration_t; | 123 } tre_iteration_t; |
132 | 124 |
133 /* An "union" node. These are created for the "|" operator. */ | 125 /* An "union" node. These are created for the "|" operator. */ |
134 typedef struct { | 126 typedef struct { |
135 tre_ast_node_t *left; | 127 tre_ast_node_t* left; |
136 tre_ast_node_t *right; | 128 tre_ast_node_t* right; |
137 } tre_union_t; | 129 } tre_union_t; |
138 | 130 |
139 | 131 static tre_ast_node_t* tre_ast_new_node(tre_mem_t mem, int type, void* obj) { |
140 static tre_ast_node_t * | 132 tre_ast_node_t* node = tre_mem_calloc(mem, sizeof *node); |
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj) | 133 if (!node || !obj) |
142 { | 134 return 0; |
143 » tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node); | 135 node->obj = obj; |
144 » if (!node || !obj) | 136 node->type = type; |
145 » » return 0; | 137 node->nullable = -1; |
146 » node->obj = obj; | 138 node->submatch_id = -1; |
147 » node->type = type; | 139 return node; |
148 » node->nullable = -1; | |
149 » node->submatch_id = -1; | |
150 » return node; | |
151 } | 140 } |
152 | 141 |
153 static tre_ast_node_t * | 142 static tre_ast_node_t* tre_ast_new_literal(tre_mem_t mem, |
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position) | 143 int code_min, |
155 { | 144 int code_max, |
156 » tre_ast_node_t *node; | 145 int position) { |
157 » tre_literal_t *lit; | 146 tre_ast_node_t* node; |
| 147 tre_literal_t* lit; |
158 | 148 |
159 » lit = tre_mem_calloc(mem, sizeof *lit); | 149 lit = tre_mem_calloc(mem, sizeof *lit); |
160 » node = tre_ast_new_node(mem, LITERAL, lit); | 150 node = tre_ast_new_node(mem, LITERAL, lit); |
161 » if (!node) | 151 if (!node) |
162 » » return 0; | 152 return 0; |
163 » lit->code_min = code_min; | 153 lit->code_min = code_min; |
164 » lit->code_max = code_max; | 154 lit->code_max = code_max; |
165 » lit->position = position; | 155 lit->position = position; |
166 » return node; | 156 return node; |
167 } | 157 } |
168 | 158 |
169 static tre_ast_node_t * | 159 static tre_ast_node_t* tre_ast_new_iter(tre_mem_t mem, |
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minim
al) | 160 tre_ast_node_t* arg, |
171 { | 161 int min, |
172 » tre_ast_node_t *node; | 162 int max, |
173 » tre_iteration_t *iter; | 163 int minimal) { |
| 164 tre_ast_node_t* node; |
| 165 tre_iteration_t* iter; |
174 | 166 |
175 » iter = tre_mem_calloc(mem, sizeof *iter); | 167 iter = tre_mem_calloc(mem, sizeof *iter); |
176 » node = tre_ast_new_node(mem, ITERATION, iter); | 168 node = tre_ast_new_node(mem, ITERATION, iter); |
177 » if (!node) | 169 if (!node) |
178 » » return 0; | 170 return 0; |
179 » iter->arg = arg; | 171 iter->arg = arg; |
180 » iter->min = min; | 172 iter->min = min; |
181 » iter->max = max; | 173 iter->max = max; |
182 » iter->minimal = minimal; | 174 iter->minimal = minimal; |
183 » node->num_submatches = arg->num_submatches; | 175 node->num_submatches = arg->num_submatches; |
184 » return node; | 176 return node; |
185 } | 177 } |
186 | 178 |
187 static tre_ast_node_t * | 179 static tre_ast_node_t* tre_ast_new_union(tre_mem_t mem, |
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right) | 180 tre_ast_node_t* left, |
189 { | 181 tre_ast_node_t* right) { |
190 » tre_ast_node_t *node; | 182 tre_ast_node_t* node; |
191 » tre_union_t *un; | 183 tre_union_t* un; |
192 | 184 |
193 » if (!left) | 185 if (!left) |
194 » » return right; | 186 return right; |
195 » un = tre_mem_calloc(mem, sizeof *un); | 187 un = tre_mem_calloc(mem, sizeof *un); |
196 » node = tre_ast_new_node(mem, UNION, un); | 188 node = tre_ast_new_node(mem, UNION, un); |
197 » if (!node || !right) | 189 if (!node || !right) |
198 » » return 0; | 190 return 0; |
199 » un->left = left; | 191 un->left = left; |
200 » un->right = right; | 192 un->right = right; |
201 » node->num_submatches = left->num_submatches + right->num_submatches; | 193 node->num_submatches = left->num_submatches + right->num_submatches; |
202 » return node; | 194 return node; |
203 } | 195 } |
204 | 196 |
205 static tre_ast_node_t * | 197 static tre_ast_node_t* tre_ast_new_catenation(tre_mem_t mem, |
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *righ
t) | 198 tre_ast_node_t* left, |
207 { | 199 tre_ast_node_t* right) { |
208 » tre_ast_node_t *node; | 200 tre_ast_node_t* node; |
209 » tre_catenation_t *cat; | 201 tre_catenation_t* cat; |
210 | 202 |
211 » if (!left) | 203 if (!left) |
212 » » return right; | 204 return right; |
213 » cat = tre_mem_calloc(mem, sizeof *cat); | 205 cat = tre_mem_calloc(mem, sizeof *cat); |
214 » node = tre_ast_new_node(mem, CATENATION, cat); | 206 node = tre_ast_new_node(mem, CATENATION, cat); |
215 » if (!node) | 207 if (!node) |
216 » » return 0; | 208 return 0; |
217 » cat->left = left; | 209 cat->left = left; |
218 » cat->right = right; | 210 cat->right = right; |
219 » node->num_submatches = left->num_submatches + right->num_submatches; | 211 node->num_submatches = left->num_submatches + right->num_submatches; |
220 » return node; | 212 return node; |
221 } | 213 } |
222 | 214 |
223 | |
224 /*********************************************************************** | 215 /*********************************************************************** |
225 from tre-stack.c and tre-stack.h | 216 from tre-stack.c and tre-stack.h |
226 ***********************************************************************/ | 217 ***********************************************************************/ |
227 | 218 |
228 typedef struct tre_stack_rec tre_stack_t; | 219 typedef struct tre_stack_rec tre_stack_t; |
229 | 220 |
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size' | 221 /* Creates a new stack object. `size' is initial size in bytes, `max_size' |
231 is maximum size, and `increment' specifies how much more space will be | 222 is maximum size, and `increment' specifies how much more space will be |
232 allocated with realloc() if all space gets used up. Returns the stack | 223 allocated with realloc() if all space gets used up. Returns the stack |
233 object or NULL if out of memory. */ | 224 object or NULL if out of memory. */ |
234 static tre_stack_t * | 225 static tre_stack_t* tre_stack_new(int size, int max_size, int increment); |
235 tre_stack_new(int size, int max_size, int increment); | |
236 | 226 |
237 /* Frees the stack object. */ | 227 /* Frees the stack object. */ |
238 static void | 228 static void tre_stack_destroy(tre_stack_t* s); |
239 tre_stack_destroy(tre_stack_t *s); | |
240 | 229 |
241 /* Returns the current number of objects in the stack. */ | 230 /* Returns the current number of objects in the stack. */ |
242 static int | 231 static int tre_stack_num_objects(tre_stack_t* s); |
243 tre_stack_num_objects(tre_stack_t *s); | |
244 | 232 |
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes | 233 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes |
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory. | 234 `value' on top of stack `s'. Returns REG_ESPACE if out of memory. |
247 This tries to realloc() more space before failing if maximum size | 235 This tries to realloc() more space before failing if maximum size |
248 has not yet been reached. Returns REG_OK if successful. */ | 236 has not yet been reached. Returns REG_OK if successful. */ |
249 #define declare_pushf(typetag, type)» » » » » \ | 237 #define declare_pushf(typetag, type) \ |
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value) | 238 static reg_errcode_t tre_stack_push_##typetag(tre_stack_t* s, type value) |
251 | 239 |
252 declare_pushf(voidptr, void *); | 240 declare_pushf(voidptr, void*); |
253 declare_pushf(int, int); | 241 declare_pushf(int, int); |
254 | 242 |
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost | 243 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost |
256 element off of stack `s' and returns it. The stack must not be | 244 element off of stack `s' and returns it. The stack must not be |
257 empty. */ | 245 empty. */ |
258 #define declare_popf(typetag, type)» » \ | 246 #define declare_popf(typetag, type) \ |
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s) | 247 static type tre_stack_pop_##typetag(tre_stack_t* s) |
260 | 248 |
261 declare_popf(voidptr, void *); | 249 declare_popf(voidptr, void*); |
262 declare_popf(int, int); | 250 declare_popf(int, int); |
263 | 251 |
264 /* Just to save some typing. */ | 252 /* Just to save some typing. */ |
265 #define STACK_PUSH(s, typetag, value)» » » » » \ | 253 #define STACK_PUSH(s, typetag, value) \ |
266 do» » » » » » » » » \ | 254 do { \ |
267 {» » » » » » » » » \ | 255 status = tre_stack_push_##typetag(s, value); \ |
268 status = tre_stack_push_ ## typetag(s, value);» » » \ | 256 } while (/*CONSTCOND*/ 0) |
269 }» » » » » » » » » \ | |
270 while (/*CONSTCOND*/0) | |
271 | 257 |
272 #define STACK_PUSHX(s, typetag, value)» » » » » \ | 258 #define STACK_PUSHX(s, typetag, value) \ |
273 {» » » » » » » » » \ | 259 { \ |
274 status = tre_stack_push_ ## typetag(s, value);» » » \ | 260 status = tre_stack_push_##typetag(s, value); \ |
275 if (status != REG_OK)» » » » » » \ | 261 if (status != REG_OK) \ |
276 break;» » » » » » » » \ | 262 break; \ |
277 } | 263 } |
278 | 264 |
279 #define STACK_PUSHR(s, typetag, value)» » » » » \ | 265 #define STACK_PUSHR(s, typetag, value) \ |
280 {» » » » » » » » » \ | 266 { \ |
281 reg_errcode_t _status;» » » » » » \ | 267 reg_errcode_t _status; \ |
282 _status = tre_stack_push_ ## typetag(s, value);» » » \ | 268 _status = tre_stack_push_##typetag(s, value); \ |
283 if (_status != REG_OK)» » » » » » \ | 269 if (_status != REG_OK) \ |
284 return _status;» » » » » » » \ | 270 return _status; \ |
285 } | 271 } |
286 | 272 |
287 union tre_stack_item { | 273 union tre_stack_item { |
288 void *voidptr_value; | 274 void* voidptr_value; |
289 int int_value; | 275 int int_value; |
290 }; | 276 }; |
291 | 277 |
292 struct tre_stack_rec { | 278 struct tre_stack_rec { |
293 int size; | 279 int size; |
294 int max_size; | 280 int max_size; |
295 int increment; | 281 int increment; |
296 int ptr; | 282 int ptr; |
297 union tre_stack_item *stack; | 283 union tre_stack_item* stack; |
298 }; | 284 }; |
299 | 285 |
300 | 286 static tre_stack_t* tre_stack_new(int size, int max_size, int increment) { |
301 static tre_stack_t * | 287 tre_stack_t* s; |
302 tre_stack_new(int size, int max_size, int increment) | |
303 { | |
304 tre_stack_t *s; | |
305 | 288 |
306 s = xmalloc(sizeof(*s)); | 289 s = xmalloc(sizeof(*s)); |
307 if (s != NULL) | 290 if (s != NULL) { |
308 { | 291 s->stack = xmalloc(sizeof(*s->stack) * size); |
309 s->stack = xmalloc(sizeof(*s->stack) * size); | 292 if (s->stack == NULL) { |
310 if (s->stack == NULL) | 293 xfree(s); |
311 » { | 294 return NULL; |
312 » xfree(s); | |
313 » return NULL; | |
314 » } | |
315 s->size = size; | |
316 s->max_size = max_size; | |
317 s->increment = increment; | |
318 s->ptr = 0; | |
319 } | 295 } |
| 296 s->size = size; |
| 297 s->max_size = max_size; |
| 298 s->increment = increment; |
| 299 s->ptr = 0; |
| 300 } |
320 return s; | 301 return s; |
321 } | 302 } |
322 | 303 |
323 static void | 304 static void tre_stack_destroy(tre_stack_t* s) { |
324 tre_stack_destroy(tre_stack_t *s) | |
325 { | |
326 xfree(s->stack); | 305 xfree(s->stack); |
327 xfree(s); | 306 xfree(s); |
328 } | 307 } |
329 | 308 |
330 static int | 309 static int tre_stack_num_objects(tre_stack_t* s) { |
331 tre_stack_num_objects(tre_stack_t *s) | |
332 { | |
333 return s->ptr; | 310 return s->ptr; |
334 } | 311 } |
335 | 312 |
336 static reg_errcode_t | 313 static reg_errcode_t tre_stack_push(tre_stack_t* s, |
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value) | 314 union tre_stack_item value) { |
338 { | 315 if (s->ptr < s->size) { |
339 if (s->ptr < s->size) | 316 s->stack[s->ptr] = value; |
340 { | 317 s->ptr++; |
341 s->stack[s->ptr] = value; | 318 } else { |
342 s->ptr++; | 319 if (s->size >= s->max_size) { |
| 320 return REG_ESPACE; |
| 321 } else { |
| 322 union tre_stack_item* new_buffer; |
| 323 int new_size; |
| 324 new_size = s->size + s->increment; |
| 325 if (new_size > s->max_size) |
| 326 new_size = s->max_size; |
| 327 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size); |
| 328 if (new_buffer == NULL) { |
| 329 return REG_ESPACE; |
| 330 } |
| 331 assert(new_size > s->size); |
| 332 s->size = new_size; |
| 333 s->stack = new_buffer; |
| 334 tre_stack_push(s, value); |
343 } | 335 } |
344 else | 336 } |
345 { | |
346 if (s->size >= s->max_size) | |
347 » { | |
348 » return REG_ESPACE; | |
349 » } | |
350 else | |
351 » { | |
352 » union tre_stack_item *new_buffer; | |
353 » int new_size; | |
354 » new_size = s->size + s->increment; | |
355 » if (new_size > s->max_size) | |
356 » new_size = s->max_size; | |
357 » new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size); | |
358 » if (new_buffer == NULL) | |
359 » { | |
360 » return REG_ESPACE; | |
361 » } | |
362 » assert(new_size > s->size); | |
363 » s->size = new_size; | |
364 » s->stack = new_buffer; | |
365 » tre_stack_push(s, value); | |
366 » } | |
367 } | |
368 return REG_OK; | 337 return REG_OK; |
369 } | 338 } |
370 | 339 |
371 #define define_pushf(typetag, type) \ | 340 #define define_pushf(typetag, type) \ |
372 declare_pushf(typetag, type) { \ | 341 declare_pushf(typetag, type) { \ |
373 union tre_stack_item item;» \ | 342 union tre_stack_item item; \ |
374 item.typetag ## _value = value; \ | 343 item.typetag##_value = value; \ |
375 return tre_stack_push(s, item); \ | 344 return tre_stack_push(s, item); \ |
376 } | |
377 | |
378 define_pushf(int, int) | |
379 define_pushf(voidptr, void *) | |
380 | |
381 #define define_popf(typetag, type)» » \ | |
382 declare_popf(typetag, type) {»» » \ | |
383 return s->stack[--s->ptr].typetag ## _value; \ | |
384 } | 345 } |
385 | 346 |
386 define_popf(int, int) | 347 define_pushf(int, int) define_pushf(voidptr, void*) |
387 define_popf(voidptr, void *) | 348 #define define_popf(typetag, type) \ |
| 349 declare_popf(typetag, type) { return s->stack[--s->ptr].typetag##_value; } |
388 | 350 |
| 351 define_popf(int, int) define_popf(voidptr, void*) |
389 | 352 |
390 /*********************************************************************** | 353 /*********************************************************************** |
391 from tre-parse.c and tre-parse.h | 354 from tre-parse.c and tre-parse.h |
392 ***********************************************************************/ | 355 ***********************************************************************/ |
393 | 356 |
394 /* Parse context. */ | 357 /* Parse context. */ |
395 typedef struct { | 358 typedef struct { |
396 » /* Memory allocator. The AST is allocated using this. */ | 359 /* Memory allocator. The AST is allocated using this. */ |
397 » tre_mem_t mem; | 360 tre_mem_t mem; |
398 » /* Stack used for keeping track of regexp syntax. */ | 361 /* Stack used for keeping track of regexp syntax. */ |
399 » tre_stack_t *stack; | 362 tre_stack_t* stack; |
400 » /* The parsed node after a parse function returns. */ | 363 /* The parsed node after a parse function returns. */ |
401 » tre_ast_node_t *n; | 364 tre_ast_node_t* n; |
402 » /* Position in the regexp pattern after a parse function returns. */ | 365 /* Position in the regexp pattern after a parse function returns. */ |
403 » const char *s; | 366 const char* s; |
404 » /* The first character of the regexp. */ | 367 /* The first character of the regexp. */ |
405 » const char *re; | 368 const char* re; |
406 » /* Current submatch ID. */ | 369 /* Current submatch ID. */ |
407 » int submatch_id; | 370 int submatch_id; |
408 » /* Current position (number of literal). */ | 371 /* Current position (number of literal). */ |
409 » int position; | 372 int position; |
410 » /* The highest back reference or -1 if none seen so far. */ | 373 /* The highest back reference or -1 if none seen so far. */ |
411 » int max_backref; | 374 int max_backref; |
412 » /* Compilation flags. */ | 375 /* Compilation flags. */ |
413 » int cflags; | 376 int cflags; |
414 } tre_parse_ctx_t; | 377 } tre_parse_ctx_t; |
415 | 378 |
416 /* Some macros for expanding \w, \s, etc. */ | 379 /* Some macros for expanding \w, \s, etc. */ |
417 static const struct { | 380 static const struct { |
418 » char c; | 381 char c; |
419 » const char *expansion; | 382 const char* expansion; |
420 } tre_macros[] = { | 383 } tre_macros[] = {{'t', "\t"}, |
421 » {'t', "\t"}, {'n', "\n"}, {'r', "\r"}, | 384 {'n', "\n"}, |
422 » {'f', "\f"}, {'a', "\a"}, {'e', "\033"}, | 385 {'r', "\r"}, |
423 » {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"}, | 386 {'f', "\f"}, |
424 » {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"}, | 387 {'a', "\a"}, |
425 » { 0, 0 } | 388 {'e', "\033"}, |
426 }; | 389 {'w', "[[:alnum:]_]"}, |
| 390 {'W', "[^[:alnum:]_]"}, |
| 391 {'s', "[[:space:]]"}, |
| 392 {'S', "[^[:space:]]"}, |
| 393 {'d', "[[:digit:]]"}, |
| 394 {'D', "[^[:digit:]]"}, |
| 395 {0, 0}}; |
427 | 396 |
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which | 397 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which |
429 must have at least `len' items. Sets buf[0] to zero if the there | 398 must have at least `len' items. Sets buf[0] to zero if the there |
430 is no match in `tre_macros'. */ | 399 is no match in `tre_macros'. */ |
431 static const char *tre_expand_macro(const char *s) | 400 static const char* tre_expand_macro(const char* s) { |
432 { | 401 int i; |
433 » int i; | 402 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++) |
434 » for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++); | 403 ; |
435 » return tre_macros[i].expansion; | 404 return tre_macros[i].expansion; |
436 } | 405 } |
437 | 406 |
438 static int | 407 static int tre_compare_lit(const void* a, const void* b) { |
439 tre_compare_lit(const void *a, const void *b) | 408 const tre_literal_t* const* la = a; |
440 { | 409 const tre_literal_t* const* lb = b; |
441 » const tre_literal_t *const *la = a; | 410 /* assumes the range of valid code_min is < INT_MAX */ |
442 » const tre_literal_t *const *lb = b; | 411 return la[0]->code_min - lb[0]->code_min; |
443 » /* assumes the range of valid code_min is < INT_MAX */ | |
444 » return la[0]->code_min - lb[0]->code_min; | |
445 } | 412 } |
446 | 413 |
447 struct literals { | 414 struct literals { |
448 » tre_mem_t mem; | 415 tre_mem_t mem; |
449 » tre_literal_t **a; | 416 tre_literal_t** a; |
450 » int len; | 417 int len; |
451 » int cap; | 418 int cap; |
452 }; | 419 }; |
453 | 420 |
454 static tre_literal_t *tre_new_lit(struct literals *p) | 421 static tre_literal_t* tre_new_lit(struct literals* p) { |
455 { | 422 tre_literal_t** a; |
456 » tre_literal_t **a; | 423 if (p->len >= p->cap) { |
457 » if (p->len >= p->cap) { | 424 if (p->cap >= 1 << 15) |
458 » » if (p->cap >= 1<<15) | 425 return 0; |
459 » » » return 0; | 426 p->cap *= 2; |
460 » » p->cap *= 2; | 427 a = xrealloc(p->a, p->cap * sizeof *p->a); |
461 » » a = xrealloc(p->a, p->cap * sizeof *p->a); | 428 if (!a) |
462 » » if (!a) | 429 return 0; |
463 » » » return 0; | 430 p->a = a; |
464 » » p->a = a; | 431 } |
465 » } | 432 a = p->a + p->len++; |
466 » a = p->a + p->len++; | 433 *a = tre_mem_calloc(p->mem, sizeof **a); |
467 » *a = tre_mem_calloc(p->mem, sizeof **a); | 434 return *a; |
468 » return *a; | |
469 } | 435 } |
470 | 436 |
471 static int add_icase_literals(struct literals *ls, int min, int max) | 437 static int add_icase_literals(struct literals* ls, int min, int max) { |
472 { | 438 tre_literal_t* lit; |
473 » tre_literal_t *lit; | 439 int b, e, c; |
474 » int b, e, c; | 440 for (c = min; c <= max;) { |
475 » for (c=min; c<=max; ) { | 441 /* assumes islower(c) and isupper(c) are exclusive |
476 » » /* assumes islower(c) and isupper(c) are exclusive | 442 and toupper(c)!=c if islower(c). |
477 » » and toupper(c)!=c if islower(c). | 443 multiple opposite case characters are not supported */ |
478 » » multiple opposite case characters are not supported */ | 444 if (tre_islower(c)) { |
479 » » if (tre_islower(c)) { | 445 b = e = tre_toupper(c); |
480 » » » b = e = tre_toupper(c); | 446 for (c++, e++; c <= max; c++, e++) |
481 » » » for (c++, e++; c<=max; c++, e++) | 447 if (tre_toupper(c) != e) |
482 » » » » if (tre_toupper(c) != e) break; | 448 break; |
483 » » } else if (tre_isupper(c)) { | 449 } else if (tre_isupper(c)) { |
484 » » » b = e = tre_tolower(c); | 450 b = e = tre_tolower(c); |
485 » » » for (c++, e++; c<=max; c++, e++) | 451 for (c++, e++; c <= max; c++, e++) |
486 » » » » if (tre_tolower(c) != e) break; | 452 if (tre_tolower(c) != e) |
487 » » } else { | 453 break; |
488 » » » c++; | 454 } else { |
489 » » » continue; | 455 c++; |
490 » » } | 456 continue; |
491 » » lit = tre_new_lit(ls); | 457 } |
492 » » if (!lit) | 458 lit = tre_new_lit(ls); |
493 » » » return -1; | 459 if (!lit) |
494 » » lit->code_min = b; | 460 return -1; |
495 » » lit->code_max = e-1; | 461 lit->code_min = b; |
496 » » lit->position = -1; | 462 lit->code_max = e - 1; |
497 » } | 463 lit->position = -1; |
498 » return 0; | 464 } |
| 465 return 0; |
499 } | 466 } |
500 | 467 |
501 | |
502 /* Maximum number of character classes in a negated bracket expression. */ | 468 /* Maximum number of character classes in a negated bracket expression. */ |
503 #define MAX_NEG_CLASSES 64 | 469 #define MAX_NEG_CLASSES 64 |
504 | 470 |
505 struct neg { | 471 struct neg { |
506 » int negate; | 472 int negate; |
507 » int len; | 473 int len; |
508 » tre_ctype_t a[MAX_NEG_CLASSES]; | 474 tre_ctype_t a[MAX_NEG_CLASSES]; |
509 }; | 475 }; |
510 | 476 |
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges | 477 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges |
512 | 478 |
513 /* | 479 /* |
514 bracket grammar: | 480 bracket grammar: |
515 Bracket = '[' List ']' | '[^' List ']' | 481 Bracket = '[' List ']' | '[^' List ']' |
516 List = Term | List Term | 482 List = Term | List Term |
517 Term = Char | Range | Chclass | Eqclass | 483 Term = Char | Range | Chclass | Eqclass |
518 Range = Char '-' Char | Char '-' '-' | 484 Range = Char '-' Char | Char '-' '-' |
519 Char = Coll | coll_single | 485 Char = Coll | coll_single |
520 Meta = ']' | '-' | 486 Meta = ']' | '-' |
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]' | 487 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]' |
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]' | 488 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]' |
523 Chclass = '[:' class ':]' | 489 Chclass = '[:' class ':]' |
524 | 490 |
525 coll_single is a single char collating element but it can be | 491 coll_single is a single char collating element but it can be |
526 '-' only at the beginning or end of a List and | 492 '-' only at the beginning or end of a List and |
527 ']' only at the beginning of a List and | 493 ']' only at the beginning of a List and |
528 '^' anywhere except after the openning '[' | 494 '^' anywhere except after the openning '[' |
529 */ | 495 */ |
530 | 496 |
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, st
ruct literals *ls, struct neg *neg) | 497 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t* ctx, |
532 { | 498 const char* s, |
533 » const char *start = s; | 499 struct literals* ls, |
534 » tre_ctype_t class; | 500 struct neg* neg) { |
535 » int min, max; | 501 const char* start = s; |
536 » wchar_t wc; | 502 tre_ctype_t class; |
537 » int len; | 503 int min, max; |
538 | 504 wchar_t wc; |
539 » for (;;) { | 505 int len; |
540 » » class = 0; | 506 |
541 » » len = mbtowc(&wc, s, -1); | 507 for (;;) { |
542 » » if (len <= 0) | 508 class = 0; |
543 » » » return *s ? REG_BADPAT : REG_EBRACK; | 509 len = mbtowc(&wc, s, -1); |
544 » » if (*s == ']' && s != start) { | 510 if (len <= 0) |
545 » » » ctx->s = s+1; | 511 return *s ? REG_BADPAT : REG_EBRACK; |
546 » » » return REG_OK; | 512 if (*s == ']' && s != start) { |
547 » » } | 513 ctx->s = s + 1; |
548 » » if (*s == '-' && s != start && s[1] != ']' && | 514 return REG_OK; |
549 » » /* extension: [a-z--@] is accepted as [a-z]|[--@] */ | 515 } |
550 » » (s[1] != '-' || s[2] == ']')) | 516 if (*s == '-' && s != start && s[1] != ']' && |
551 » » » return REG_ERANGE; | 517 /* extension: [a-z--@] is accepted as [a-z]|[--@] */ |
552 » » if (*s == '[' && (s[1] == '.' || s[1] == '=')) | 518 (s[1] != '-' || s[2] == ']')) |
553 » » » /* collating symbols and equivalence classes are not sup
ported */ | 519 return REG_ERANGE; |
554 » » » return REG_ECOLLATE; | 520 if (*s == '[' && (s[1] == '.' || s[1] == '=')) |
555 » » if (*s == '[' && s[1] == ':') { | 521 /* collating symbols and equivalence classes are not supported */ |
556 » » » char tmp[CHARCLASS_NAME_MAX+1]; | 522 return REG_ECOLLATE; |
557 » » » s += 2; | 523 if (*s == '[' && s[1] == ':') { |
558 » » » for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) { | 524 char tmp[CHARCLASS_NAME_MAX + 1]; |
559 » » » » if (s[len] == ':') { | 525 s += 2; |
560 » » » » » memcpy(tmp, s, len); | 526 for (len = 0; len < CHARCLASS_NAME_MAX && s[len]; len++) { |
561 » » » » » tmp[len] = 0; | 527 if (s[len] == ':') { |
562 » » » » » class = tre_ctype(tmp); | 528 memcpy(tmp, s, len); |
563 » » » » » break; | 529 tmp[len] = 0; |
564 » » » » } | 530 class = tre_ctype(tmp); |
565 » » » } | 531 break; |
566 » » » if (!class || s[len+1] != ']') | 532 } |
567 » » » » return REG_ECTYPE; | 533 } |
568 » » » min = 0; | 534 if (!class || s[len + 1] != ']') |
569 » » » max = TRE_CHAR_MAX; | 535 return REG_ECTYPE; |
570 » » » s += len+2; | 536 min = 0; |
571 » » } else { | 537 max = TRE_CHAR_MAX; |
572 » » » min = max = wc; | 538 s += len + 2; |
573 » » » s += len; | 539 } else { |
574 » » » if (*s == '-' && s[1] != ']') { | 540 min = max = wc; |
575 » » » » s++; | 541 s += len; |
576 » » » » len = mbtowc(&wc, s, -1); | 542 if (*s == '-' && s[1] != ']') { |
577 » » » » max = wc; | 543 s++; |
578 » » » » /* XXX - Should use collation order instead of | 544 len = mbtowc(&wc, s, -1); |
579 » » » » encoding values in character ranges. */ | 545 max = wc; |
580 » » » » if (len <= 0 || min > max) | 546 /* XXX - Should use collation order instead of |
581 » » » » » return REG_ERANGE; | 547 encoding values in character ranges. */ |
582 » » » » s += len; | 548 if (len <= 0 || min > max) |
583 » » » } | 549 return REG_ERANGE; |
584 » » } | 550 s += len; |
585 | 551 } |
586 » » if (class && neg->negate) { | 552 } |
587 » » » if (neg->len >= MAX_NEG_CLASSES) | 553 |
588 » » » » return REG_ESPACE; | 554 if (class && neg->negate) { |
589 » » » neg->a[neg->len++] = class; | 555 if (neg->len >= MAX_NEG_CLASSES) |
590 » » } else { | 556 return REG_ESPACE; |
591 » » » tre_literal_t *lit = tre_new_lit(ls); | 557 neg->a[neg->len++] = class; |
592 » » » if (!lit) | 558 } else { |
593 » » » » return REG_ESPACE; | 559 tre_literal_t* lit = tre_new_lit(ls); |
594 » » » lit->code_min = min; | 560 if (!lit) |
595 » » » lit->code_max = max; | 561 return REG_ESPACE; |
596 » » » lit->class = class; | 562 lit->code_min = min; |
597 » » » lit->position = -1; | 563 lit->code_max = max; |
598 | 564 lit->class = class; |
599 » » » /* Add opposite-case codepoints if REG_ICASE is present. | 565 lit->position = -1; |
600 » » » It seems that POSIX requires that bracket negation | 566 |
601 » » » should happen before case-folding, but most practical | 567 /* Add opposite-case codepoints if REG_ICASE is present. |
602 » » » implementations do it the other way around. Changing | 568 It seems that POSIX requires that bracket negation |
603 » » » the order would need efficient representation of | 569 should happen before case-folding, but most practical |
604 » » » case-fold ranges and bracket range sets even with | 570 implementations do it the other way around. Changing |
605 » » » simple patterns so this is ok for now. */ | 571 the order would need efficient representation of |
606 » » » if (ctx->cflags & REG_ICASE && !class) | 572 case-fold ranges and bracket range sets even with |
607 » » » » if (add_icase_literals(ls, min, max)) | 573 simple patterns so this is ok for now. */ |
608 » » » » » return REG_ESPACE; | 574 if (ctx->cflags & REG_ICASE && !class) |
609 » » } | 575 if (add_icase_literals(ls, min, max)) |
610 » } | 576 return REG_ESPACE; |
611 } | 577 } |
612 | 578 } |
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s) | 579 } |
614 { | 580 |
615 » int i, max, min, negmax, negmin; | 581 static reg_errcode_t parse_bracket(tre_parse_ctx_t* ctx, const char* s) { |
616 » tre_ast_node_t *node = 0, *n; | 582 int i, max, min, negmax, negmin; |
617 » tre_ctype_t *nc = 0; | 583 tre_ast_node_t *node = 0, *n; |
618 » tre_literal_t *lit; | 584 tre_ctype_t* nc = 0; |
619 » struct literals ls; | 585 tre_literal_t* lit; |
620 » struct neg neg; | 586 struct literals ls; |
621 » reg_errcode_t err; | 587 struct neg neg; |
622 | 588 reg_errcode_t err; |
623 » ls.mem = ctx->mem; | 589 |
624 » ls.len = 0; | 590 ls.mem = ctx->mem; |
625 » ls.cap = 32; | 591 ls.len = 0; |
626 » ls.a = xmalloc(ls.cap * sizeof *ls.a); | 592 ls.cap = 32; |
627 » if (!ls.a) | 593 ls.a = xmalloc(ls.cap * sizeof *ls.a); |
628 » » return REG_ESPACE; | 594 if (!ls.a) |
629 » neg.len = 0; | 595 return REG_ESPACE; |
630 » neg.negate = *s == '^'; | 596 neg.len = 0; |
631 » if (neg.negate) | 597 neg.negate = *s == '^'; |
632 » » s++; | 598 if (neg.negate) |
633 | 599 s++; |
634 » err = parse_bracket_terms(ctx, s, &ls, &neg); | 600 |
635 » if (err != REG_OK) | 601 err = parse_bracket_terms(ctx, s, &ls, &neg); |
636 » » goto parse_bracket_done; | 602 if (err != REG_OK) |
637 | 603 goto parse_bracket_done; |
638 » if (neg.negate) { | 604 |
639 » » /* Sort the array if we need to negate it. */ | 605 if (neg.negate) { |
640 » » qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit); | 606 /* Sort the array if we need to negate it. */ |
641 » » /* extra lit for the last negated range */ | 607 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit); |
642 » » lit = tre_new_lit(&ls); | 608 /* extra lit for the last negated range */ |
643 » » if (!lit) { | 609 lit = tre_new_lit(&ls); |
644 » » » err = REG_ESPACE; | 610 if (!lit) { |
645 » » » goto parse_bracket_done; | 611 err = REG_ESPACE; |
646 » » } | 612 goto parse_bracket_done; |
647 » » lit->code_min = TRE_CHAR_MAX+1; | 613 } |
648 » » lit->code_max = TRE_CHAR_MAX+1; | 614 lit->code_min = TRE_CHAR_MAX + 1; |
649 » » lit->position = -1; | 615 lit->code_max = TRE_CHAR_MAX + 1; |
650 » » /* negated classes */ | 616 lit->position = -1; |
651 » » if (neg.len) { | 617 /* negated classes */ |
652 » » » nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a); | 618 if (neg.len) { |
653 » » » if (!nc) { | 619 nc = tre_mem_alloc(ctx->mem, (neg.len + 1) * sizeof *neg.a); |
654 » » » » err = REG_ESPACE; | 620 if (!nc) { |
655 » » » » goto parse_bracket_done; | 621 err = REG_ESPACE; |
656 » » » } | 622 goto parse_bracket_done; |
657 » » » memcpy(nc, neg.a, neg.len*sizeof *neg.a); | 623 } |
658 » » » nc[neg.len] = 0; | 624 memcpy(nc, neg.a, neg.len * sizeof *neg.a); |
659 » » } | 625 nc[neg.len] = 0; |
660 » } | 626 } |
661 | 627 } |
662 » /* Build a union of the items in the array, negated if necessary. */ | 628 |
663 » negmax = negmin = 0; | 629 /* Build a union of the items in the array, negated if necessary. */ |
664 » for (i = 0; i < ls.len; i++) { | 630 negmax = negmin = 0; |
665 » » lit = ls.a[i]; | 631 for (i = 0; i < ls.len; i++) { |
666 » » min = lit->code_min; | 632 lit = ls.a[i]; |
667 » » max = lit->code_max; | 633 min = lit->code_min; |
668 » » if (neg.negate) { | 634 max = lit->code_max; |
669 » » » if (min <= negmin) { | 635 if (neg.negate) { |
670 » » » » /* Overlap. */ | 636 if (min <= negmin) { |
671 » » » » negmin = MAX(max + 1, negmin); | 637 /* Overlap. */ |
672 » » » » continue; | 638 negmin = MAX(max + 1, negmin); |
673 » » » } | 639 continue; |
674 » » » negmax = min - 1; | 640 } |
675 » » » lit->code_min = negmin; | 641 negmax = min - 1; |
676 » » » lit->code_max = negmax; | 642 lit->code_min = negmin; |
677 » » » negmin = max + 1; | 643 lit->code_max = negmax; |
678 » » } | 644 negmin = max + 1; |
679 » » lit->position = ctx->position; | 645 } |
680 » » lit->neg_classes = nc; | 646 lit->position = ctx->position; |
681 » » n = tre_ast_new_node(ctx->mem, LITERAL, lit); | 647 lit->neg_classes = nc; |
682 » » node = tre_ast_new_union(ctx->mem, node, n); | 648 n = tre_ast_new_node(ctx->mem, LITERAL, lit); |
683 » » if (!node) { | 649 node = tre_ast_new_union(ctx->mem, node, n); |
684 » » » err = REG_ESPACE; | 650 if (!node) { |
685 » » » break; | 651 err = REG_ESPACE; |
686 » » } | 652 break; |
687 » } | 653 } |
| 654 } |
688 | 655 |
689 parse_bracket_done: | 656 parse_bracket_done: |
690 » xfree(ls.a); | 657 xfree(ls.a); |
691 » ctx->position++; | 658 ctx->position++; |
692 » ctx->n = node; | 659 ctx->n = node; |
693 » return err; | 660 return err; |
694 } | 661 } |
695 | 662 |
696 static const char *parse_dup_count(const char *s, int *n) | 663 static const char* parse_dup_count(const char* s, int* n) { |
697 { | 664 *n = -1; |
698 » *n = -1; | 665 if (!isdigit(*s)) |
699 » if (!isdigit(*s)) | 666 return s; |
700 » » return s; | 667 *n = 0; |
701 » *n = 0; | 668 for (;;) { |
702 » for (;;) { | 669 *n = 10 * *n + (*s - '0'); |
703 » » *n = 10 * *n + (*s - '0'); | 670 s++; |
704 » » s++; | 671 if (!isdigit(*s) || *n > RE_DUP_MAX) |
705 » » if (!isdigit(*s) || *n > RE_DUP_MAX) | 672 break; |
706 » » » break; | 673 } |
707 » } | 674 return s; |
708 » return s; | 675 } |
709 } | 676 |
710 | 677 static const char* parse_dup(const char* s, int ere, int* pmin, int* pmax) { |
711 static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax) | 678 int min, max; |
712 { | 679 |
713 » int min, max; | 680 s = parse_dup_count(s, &min); |
714 | 681 if (*s == ',') |
715 » s = parse_dup_count(s, &min); | 682 s = parse_dup_count(s + 1, &max); |
716 » if (*s == ',') | 683 else |
717 » » s = parse_dup_count(s+1, &max); | 684 max = min; |
718 » else | 685 |
719 » » max = min; | 686 if ((max < min && max >= 0) || max > RE_DUP_MAX || min > RE_DUP_MAX || |
720 | 687 min < 0 || (!ere && *s++ != '\\') || *s++ != '}') |
721 » if ( | 688 return 0; |
722 » » (max < min && max >= 0) || | 689 *pmin = min; |
723 » » max > RE_DUP_MAX || | 690 *pmax = max; |
724 » » min > RE_DUP_MAX || | 691 return s; |
725 » » min < 0 || | 692 } |
726 » » (!ere && *s++ != '\\') || | 693 |
727 » » *s++ != '}' | 694 static int hexval(unsigned c) { |
728 » ) | 695 if (c - '0' < 10) |
729 » » return 0; | 696 return c - '0'; |
730 » *pmin = min; | 697 c |= 32; |
731 » *pmax = max; | 698 if (c - 'a' < 6) |
732 » return s; | 699 return c - 'a' + 10; |
733 } | 700 return -1; |
734 | 701 } |
735 static int hexval(unsigned c) | 702 |
736 { | 703 static reg_errcode_t marksub(tre_parse_ctx_t* ctx, |
737 » if (c-'0'<10) return c-'0'; | 704 tre_ast_node_t* node, |
738 » c |= 32; | 705 int subid) { |
739 » if (c-'a'<6) return c-'a'+10; | 706 if (node->submatch_id >= 0) { |
740 » return -1; | 707 tre_ast_node_t* n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); |
741 } | 708 if (!n) |
742 | 709 return REG_ESPACE; |
743 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int sub
id) | 710 n = tre_ast_new_catenation(ctx->mem, n, node); |
744 { | 711 if (!n) |
745 » if (node->submatch_id >= 0) { | 712 return REG_ESPACE; |
746 » » tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1)
; | 713 n->num_submatches = node->num_submatches; |
747 » » if (!n) | 714 node = n; |
748 » » » return REG_ESPACE; | 715 } |
749 » » n = tre_ast_new_catenation(ctx->mem, n, node); | 716 node->submatch_id = subid; |
750 » » if (!n) | 717 node->num_submatches++; |
751 » » » return REG_ESPACE; | 718 ctx->n = node; |
752 » » n->num_submatches = node->num_submatches; | 719 return REG_OK; |
753 » » node = n; | |
754 » } | |
755 » node->submatch_id = subid; | |
756 » node->num_submatches++; | |
757 » ctx->n = node; | |
758 » return REG_OK; | |
759 } | 720 } |
760 | 721 |
761 /* | 722 /* |
762 BRE grammar: | 723 BRE grammar: |
763 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^'
Branch '$' | 724 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' |
| 725 Branch '$' |
764 Branch = Atom | Branch Atom | 726 Branch = Atom | Branch Atom |
765 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch
'\)' | back_ref | 727 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch |
766 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count
'\}' | 728 '\)' | back_ref |
| 729 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count |
| 730 '\}' |
767 | 731 |
768 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well) | 732 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well) |
769 | 733 |
770 ERE grammar: | 734 ERE grammar: |
771 Regex = Branch | Regex '|' Branch | 735 Regex = Branch | Regex '|' Branch |
772 Branch = Atom | Branch Atom | 736 Branch = Atom | Branch Atom |
773 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')
' | '^' | '$' | 737 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex |
774 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count
',' Count '}' | 738 ')' | '^' | '$' |
| 739 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count |
| 740 ',' Count '}' |
775 | 741 |
776 (a*+?, ^*, $+, \X, {, (|a) are unspecified) | 742 (a*+?, ^*, $+, \X, {, (|a) are unspecified) |
777 */ | 743 */ |
778 | 744 |
779 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s) | 745 static reg_errcode_t parse_atom(tre_parse_ctx_t* ctx, const char* s) { |
780 { | 746 int len, ere = ctx->cflags & REG_EXTENDED; |
781 » int len, ere = ctx->cflags & REG_EXTENDED; | 747 const char* p; |
782 » const char *p; | 748 tre_ast_node_t* node; |
783 » tre_ast_node_t *node; | 749 wchar_t wc; |
784 » wchar_t wc; | 750 switch (*s) { |
785 » switch (*s) { | 751 case '[': |
786 » case '[': | 752 return parse_bracket(ctx, s + 1); |
787 » » return parse_bracket(ctx, s+1); | 753 case '\\': |
788 » case '\\': | 754 p = tre_expand_macro(s + 1); |
789 » » p = tre_expand_macro(s+1); | 755 if (p) { |
790 » » if (p) { | 756 /* assume \X expansion is a single atom */ |
791 » » » /* assume \X expansion is a single atom */ | 757 reg_errcode_t err = parse_atom(ctx, p); |
792 » » » reg_errcode_t err = parse_atom(ctx, p); | 758 ctx->s = s + 2; |
793 » » » ctx->s = s+2; | 759 return err; |
794 » » » return err; | 760 } |
795 » » } | 761 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */ |
796 » » /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */ | 762 switch (*++s) { |
797 » » switch (*++s) { | 763 case 0: |
798 » » case 0: | 764 return REG_EESCAPE; |
799 » » » return REG_EESCAPE; | 765 case 'b': |
800 » » case 'b': | 766 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1); |
801 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A
T_WB, -1); | 767 break; |
802 » » » break; | 768 case 'B': |
803 » » case 'B': | 769 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1); |
804 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A
T_WB_NEG, -1); | 770 break; |
805 » » » break; | 771 case '<': |
806 » » case '<': | 772 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1); |
807 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A
T_BOW, -1); | 773 break; |
808 » » » break; | 774 case '>': |
809 » » case '>': | 775 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1); |
810 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A
T_EOW, -1); | 776 break; |
811 » » » break; | 777 case 'x': |
812 » » case 'x': | 778 s++; |
813 » » » s++; | 779 int i, v = 0, c; |
814 » » » int i, v = 0, c; | 780 len = 2; |
815 » » » len = 2; | 781 if (*s == '{') { |
816 » » » if (*s == '{') { | 782 len = 8; |
817 » » » » len = 8; | 783 s++; |
818 » » » » s++; | 784 } |
819 » » » } | 785 for (i = 0; i < len && v < 0x110000; i++) { |
820 » » » for (i=0; i<len && v<0x110000; i++) { | 786 c = hexval(s[i]); |
821 » » » » c = hexval(s[i]); | 787 if (c < 0) |
822 » » » » if (c < 0) break; | 788 break; |
823 » » » » v = 16*v + c; | 789 v = 16 * v + c; |
824 » » » } | 790 } |
825 » » » s += i; | 791 s += i; |
826 » » » if (len == 8) { | 792 if (len == 8) { |
827 » » » » if (*s != '}') | 793 if (*s != '}') |
828 » » » » » return REG_EBRACE; | 794 return REG_EBRACE; |
829 » » » » s++; | 795 s++; |
830 » » » } | 796 } |
831 » » » node = tre_ast_new_literal(ctx->mem, v, v, ctx->position
++); | 797 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++); |
832 » » » s--; | 798 s--; |
833 » » » break; | 799 break; |
834 » » case '{': | 800 case '{': |
835 » » case '+': | 801 case '+': |
836 » » case '?': | 802 case '?': |
837 » » » /* extension: treat \+, \? as repetitions in BRE */ | 803 /* extension: treat \+, \? as repetitions in BRE */ |
838 » » » /* reject repetitions after empty expression in BRE */ | 804 /* reject repetitions after empty expression in BRE */ |
839 » » » if (!ere) | 805 if (!ere) |
840 » » » » return REG_BADRPT; | 806 return REG_BADRPT; |
841 » » case '|': | 807 case '|': |
842 » » » /* extension: treat \| as alternation in BRE */ | 808 /* extension: treat \| as alternation in BRE */ |
843 » » » if (!ere) { | 809 if (!ere) { |
844 » » » » node = tre_ast_new_literal(ctx->mem, EMPTY, -1,
-1); | 810 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); |
845 » » » » s--; | 811 s--; |
846 » » » » goto end; | 812 goto end; |
847 » » » } | 813 } |
848 » » » /* fallthrough */ | 814 /* fallthrough */ |
849 » » default: | 815 default: |
850 » » » if (!ere && (unsigned)*s-'1' < 9) { | 816 if (!ere && (unsigned)*s - '1' < 9) { |
851 » » » » /* back reference */ | 817 /* back reference */ |
852 » » » » int val = *s - '0'; | 818 int val = *s - '0'; |
853 » » » » node = tre_ast_new_literal(ctx->mem, BACKREF, va
l, ctx->position++); | 819 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++); |
854 » » » » ctx->max_backref = MAX(val, ctx->max_backref); | 820 ctx->max_backref = MAX(val, ctx->max_backref); |
855 » » » } else { | 821 } else { |
856 » » » » /* extension: accept unknown escaped char | 822 /* extension: accept unknown escaped char |
857 » » » » as a literal */ | 823 as a literal */ |
858 » » » » goto parse_literal; | 824 goto parse_literal; |
859 » » » } | 825 } |
860 » » } | 826 } |
861 » » s++; | 827 s++; |
862 » » break; | 828 break; |
863 » case '.': | 829 case '.': |
864 » » if (ctx->cflags & REG_NEWLINE) { | 830 if (ctx->cflags & REG_NEWLINE) { |
865 » » » tre_ast_node_t *tmp1, *tmp2; | 831 tre_ast_node_t *tmp1, *tmp2; |
866 » » » tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos
ition++); | 832 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n' - 1, ctx->position++); |
867 » » » tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA
X, ctx->position++); | 833 tmp2 = tre_ast_new_literal(ctx->mem, '\n' + 1, TRE_CHAR_MAX, |
868 » » » if (tmp1 && tmp2) | 834 ctx->position++); |
869 » » » » node = tre_ast_new_union(ctx->mem, tmp1, tmp2); | 835 if (tmp1 && tmp2) |
870 » » » else | 836 node = tre_ast_new_union(ctx->mem, tmp1, tmp2); |
871 » » » » node = 0; | 837 else |
872 » » } else { | 838 node = 0; |
873 » » » node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ct
x->position++); | 839 } else { |
874 » » } | 840 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++); |
875 » » s++; | 841 } |
876 » » break; | 842 s++; |
877 » case '^': | 843 break; |
878 » » /* '^' has a special meaning everywhere in EREs, and at beginnin
g of BRE. */ | 844 case '^': |
879 » » if (!ere && s != ctx->re) | 845 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. |
880 » » » goto parse_literal; | 846 */ |
881 » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -
1); | 847 if (!ere && s != ctx->re) |
882 » » s++; | 848 goto parse_literal; |
883 » » break; | 849 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1); |
884 » case '$': | 850 s++; |
885 » » /* '$' is special everywhere in EREs, and in the end of the stri
ng in BREs. */ | 851 break; |
886 » » if (!ere && s[1]) | 852 case '$': |
887 » » » goto parse_literal; | 853 /* '$' is special everywhere in EREs, and in the end of the string in |
888 » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -
1); | 854 * BREs. */ |
889 » » s++; | 855 if (!ere && s[1]) |
890 » » break; | 856 goto parse_literal; |
891 » case '*': | 857 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1); |
892 » » return REG_BADPAT; | 858 s++; |
893 » case '{': | 859 break; |
894 » case '+': | 860 case '*': |
895 » case '?': | 861 return REG_BADPAT; |
896 » » /* reject repetitions after empty expression in ERE */ | 862 case '{': |
897 » » if (ere) | 863 case '+': |
898 » » » return REG_BADRPT; | 864 case '?': |
899 » case '|': | 865 /* reject repetitions after empty expression in ERE */ |
900 » » if (!ere) | 866 if (ere) |
901 » » » goto parse_literal; | 867 return REG_BADRPT; |
902 » case 0: | 868 case '|': |
903 » » node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); | 869 if (!ere) |
904 » » break; | 870 goto parse_literal; |
905 » default: | 871 case 0: |
906 parse_literal: | 872 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); |
907 » » len = mbtowc(&wc, s, -1); | 873 break; |
908 » » if (len < 0) | 874 default: |
909 » » » return REG_BADPAT; | 875 parse_literal: |
910 » » if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(w
c))) { | 876 len = mbtowc(&wc, s, -1); |
911 » » » tre_ast_node_t *tmp1, *tmp2; | 877 if (len < 0) |
912 » » » /* multiple opposite case characters are not supported *
/ | 878 return REG_BADPAT; |
913 » » » tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr
e_toupper(wc), ctx->position); | 879 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) { |
914 » » » tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr
e_tolower(wc), ctx->position); | 880 tre_ast_node_t *tmp1, *tmp2; |
915 » » » if (tmp1 && tmp2) | 881 /* multiple opposite case characters are not supported */ |
916 » » » » node = tre_ast_new_union(ctx->mem, tmp1, tmp2); | 882 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), |
917 » » » else | 883 ctx->position); |
918 » » » » node = 0; | 884 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), |
919 » » } else { | 885 ctx->position); |
920 » » » node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi
on); | 886 if (tmp1 && tmp2) |
921 » » } | 887 node = tre_ast_new_union(ctx->mem, tmp1, tmp2); |
922 » » ctx->position++; | 888 else |
923 » » s += len; | 889 node = 0; |
924 » » break; | 890 } else { |
925 » } | 891 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position); |
| 892 } |
| 893 ctx->position++; |
| 894 s += len; |
| 895 break; |
| 896 } |
926 end: | 897 end: |
927 » if (!node) | 898 if (!node) |
928 » » return REG_ESPACE; | 899 return REG_ESPACE; |
929 » ctx->n = node; | 900 ctx->n = node; |
930 » ctx->s = s; | 901 ctx->s = s; |
931 » return REG_OK; | 902 return REG_OK; |
932 } | 903 } |
933 | 904 |
934 #define PUSHPTR(err, s, v) do { \ | 905 #define PUSHPTR(err, s, v) \ |
935 » if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \ | 906 do { \ |
936 » » return err; \ | 907 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \ |
937 } while(0) | 908 return err; \ |
938 | 909 } while (0) |
939 #define PUSHINT(err, s, v) do { \ | 910 |
940 » if ((err = tre_stack_push_int(s, v)) != REG_OK) \ | 911 #define PUSHINT(err, s, v) \ |
941 » » return err; \ | 912 do { \ |
942 } while(0) | 913 if ((err = tre_stack_push_int(s, v)) != REG_OK) \ |
943 | 914 return err; \ |
944 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) | 915 } while (0) |
945 { | 916 |
946 » tre_ast_node_t *nbranch=0, *nunion=0; | 917 static reg_errcode_t tre_parse(tre_parse_ctx_t* ctx) { |
947 » int ere = ctx->cflags & REG_EXTENDED; | 918 tre_ast_node_t *nbranch = 0, *nunion = 0; |
948 » const char *s = ctx->re; | 919 int ere = ctx->cflags & REG_EXTENDED; |
949 » int subid = 0; | 920 const char* s = ctx->re; |
950 » int depth = 0; | 921 int subid = 0; |
951 » reg_errcode_t err; | 922 int depth = 0; |
952 » tre_stack_t *stack = ctx->stack; | 923 reg_errcode_t err; |
953 | 924 tre_stack_t* stack = ctx->stack; |
954 » PUSHINT(err, stack, subid++); | 925 |
955 » for (;;) { | 926 PUSHINT(err, stack, subid++); |
956 » » if ((!ere && *s == '\\' && s[1] == '(') || | 927 for (;;) { |
957 » » (ere && *s == '(')) { | 928 if ((!ere && *s == '\\' && s[1] == '(') || (ere && *s == '(')) { |
958 » » » PUSHPTR(err, stack, nunion); | 929 PUSHPTR(err, stack, nunion); |
959 » » » PUSHPTR(err, stack, nbranch); | 930 PUSHPTR(err, stack, nbranch); |
960 » » » PUSHINT(err, stack, subid++); | 931 PUSHINT(err, stack, subid++); |
961 » » » s++; | 932 s++; |
962 » » » if (!ere) | 933 if (!ere) |
963 » » » » s++; | 934 s++; |
964 » » » depth++; | 935 depth++; |
965 » » » nbranch = nunion = 0; | 936 nbranch = nunion = 0; |
966 » » » continue; | 937 continue; |
967 » » } | 938 } |
968 » » if ((!ere && *s == '\\' && s[1] == ')') || | 939 if ((!ere && *s == '\\' && s[1] == ')') || (ere && *s == ')' && depth)) { |
969 » » (ere && *s == ')' && depth)) { | 940 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); |
970 » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); | 941 if (!ctx->n) |
971 » » » if (!ctx->n) | 942 return REG_ESPACE; |
972 » » » » return REG_ESPACE; | 943 } else { |
973 » » } else { | 944 err = parse_atom(ctx, s); |
974 » » » err = parse_atom(ctx, s); | 945 if (err != REG_OK) |
975 » » » if (err != REG_OK) | 946 return err; |
976 » » » » return err; | 947 s = ctx->s; |
977 » » » s = ctx->s; | 948 } |
978 » » } | 949 |
979 | 950 parse_iter: |
980 » parse_iter: | 951 /* extension: repetitions are rejected after an empty node |
981 » » /* extension: repetitions are rejected after an empty node | 952 eg. (+), |*, {2}, but assertions are not treated as empty |
982 » » eg. (+), |*, {2}, but assertions are not treated as empty | 953 so ^* or $? are accepted currently. */ |
983 » » so ^* or $? are accepted currently. */ | 954 for (;;) { |
984 » » for (;;) { | 955 int min, max; |
985 » » » int min, max; | 956 |
986 | 957 if (*s != '\\' && *s != '*') { |
987 » » » if (*s!='\\' && *s!='*') { | 958 if (!ere) |
988 » » » » if (!ere) | 959 break; |
989 » » » » » break; | 960 if (*s != '+' && *s != '?' && *s != '{') |
990 » » » » if (*s!='+' && *s!='?' && *s!='{') | 961 break; |
991 » » » » » break; | 962 } |
992 » » » } | 963 if (*s == '\\' && ere) |
993 » » » if (*s=='\\' && ere) | 964 break; |
994 » » » » break; | 965 /* extension: treat \+, \? as repetitions in BRE */ |
995 » » » /* extension: treat \+, \? as repetitions in BRE */ | 966 if (*s == '\\' && s[1] != '+' && s[1] != '?' && s[1] != '{') |
996 » » » if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{') | 967 break; |
997 » » » » break; | 968 if (*s == '\\') |
998 » » » if (*s=='\\') | 969 s++; |
999 » » » » s++; | 970 |
1000 | 971 /* extension: multiple consecutive *+?{,} is unspecified, |
1001 » » » /* extension: multiple consecutive *+?{,} is unspecified
, | 972 but (a+)+ has to be supported so accepting a++ makes |
1002 » » » but (a+)+ has to be supported so accepting a++ makes | 973 sense, note however that the RE_DUP_MAX limit can be |
1003 » » » sense, note however that the RE_DUP_MAX limit can be | 974 circumvented: (a{255}){255} uses a lot of memory.. */ |
1004 » » » circumvented: (a{255}){255} uses a lot of memory.. */ | 975 if (*s == '{') { |
1005 » » » if (*s=='{') { | 976 s = parse_dup(s + 1, ere, &min, &max); |
1006 » » » » s = parse_dup(s+1, ere, &min, &max); | 977 if (!s) |
1007 » » » » if (!s) | 978 return REG_BADBR; |
1008 » » » » » return REG_BADBR; | 979 } else { |
1009 » » » } else { | 980 min = 0; |
1010 » » » » min=0; | 981 max = -1; |
1011 » » » » max=-1; | 982 if (*s == '+') |
1012 » » » » if (*s == '+') | 983 min = 1; |
1013 » » » » » min = 1; | 984 if (*s == '?') |
1014 » » » » if (*s == '?') | 985 max = 1; |
1015 » » » » » max = 1; | 986 s++; |
1016 » » » » s++; | 987 } |
1017 » » » } | 988 if (max == 0) |
1018 » » » if (max == 0) | 989 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); |
1019 » » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1
, -1); | 990 else |
1020 » » » else | 991 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0); |
1021 » » » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min,
max, 0); | 992 if (!ctx->n) |
1022 » » » if (!ctx->n) | 993 return REG_ESPACE; |
1023 » » » » return REG_ESPACE; | 994 } |
1024 » » } | 995 |
1025 | 996 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); |
1026 » » nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); | 997 if ((ere && *s == '|') || (ere && *s == ')' && depth) || |
1027 » » if ((ere && *s == '|') || | 998 (!ere && *s == '\\' && s[1] == ')') || |
1028 » » (ere && *s == ')' && depth) || | 999 /* extension: treat \| as alternation in BRE */ |
1029 » » (!ere && *s == '\\' && s[1] == ')') || | 1000 (!ere && *s == '\\' && s[1] == '|') || !*s) { |
1030 » » /* extension: treat \| as alternation in BRE */ | 1001 /* extension: empty branch is unspecified (), (|a), (a|) |
1031 » » (!ere && *s == '\\' && s[1] == '|') || | 1002 here they are not rejected but match on empty string */ |
1032 » » !*s) { | 1003 int c = *s; |
1033 » » » /* extension: empty branch is unspecified (), (|a), (a|) | 1004 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch); |
1034 » » » here they are not rejected but match on empty string
*/ | 1005 nbranch = 0; |
1035 » » » int c = *s; | 1006 |
1036 » » » nunion = tre_ast_new_union(ctx->mem, nunion, nbranch); | 1007 if (c == '\\' && s[1] == '|') { |
1037 » » » nbranch = 0; | 1008 s += 2; |
1038 | 1009 } else if (c == '|') { |
1039 » » » if (c == '\\' && s[1] == '|') { | 1010 s++; |
1040 » » » » s+=2; | 1011 } else { |
1041 » » » } else if (c == '|') { | 1012 if (c == '\\') { |
1042 » » » » s++; | 1013 if (!depth) |
1043 » » » } else { | 1014 return REG_EPAREN; |
1044 » » » » if (c == '\\') { | 1015 s += 2; |
1045 » » » » » if (!depth) return REG_EPAREN; | 1016 } else if (c == ')') |
1046 » » » » » s+=2; | 1017 s++; |
1047 » » » » } else if (c == ')') | 1018 depth--; |
1048 » » » » » s++; | 1019 err = marksub(ctx, nunion, tre_stack_pop_int(stack)); |
1049 » » » » depth--; | 1020 if (err != REG_OK) |
1050 » » » » err = marksub(ctx, nunion, tre_stack_pop_int(sta
ck)); | 1021 return err; |
1051 » » » » if (err != REG_OK) | 1022 if (!c && depth < 0) { |
1052 » » » » » return err; | 1023 ctx->submatch_id = subid; |
1053 » » » » if (!c && depth<0) { | 1024 return REG_OK; |
1054 » » » » » ctx->submatch_id = subid; | 1025 } |
1055 » » » » » return REG_OK; | 1026 if (!c || depth < 0) |
1056 » » » » } | 1027 return REG_EPAREN; |
1057 » » » » if (!c || depth<0) | 1028 nbranch = tre_stack_pop_voidptr(stack); |
1058 » » » » » return REG_EPAREN; | 1029 nunion = tre_stack_pop_voidptr(stack); |
1059 » » » » nbranch = tre_stack_pop_voidptr(stack); | 1030 goto parse_iter; |
1060 » » » » nunion = tre_stack_pop_voidptr(stack); | 1031 } |
1061 » » » » goto parse_iter; | 1032 } |
1062 » » » } | 1033 } |
1063 » » } | 1034 } |
1064 » } | |
1065 } | |
1066 | |
1067 | 1035 |
1068 /*********************************************************************** | 1036 /*********************************************************************** |
1069 from tre-compile.c | 1037 from tre-compile.c |
1070 ***********************************************************************/ | 1038 ***********************************************************************/ |
1071 | 1039 |
1072 | |
1073 /* | 1040 /* |
1074 TODO: | 1041 TODO: |
1075 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive | 1042 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive |
1076 function calls. | 1043 function calls. |
1077 */ | 1044 */ |
1078 | 1045 |
1079 /* | 1046 /* |
1080 Algorithms to setup tags so that submatch addressing can be done. | 1047 Algorithms to setup tags so that submatch addressing can be done. |
1081 */ | 1048 */ |
1082 | 1049 |
1083 | |
1084 /* Inserts a catenation node to the root of the tree given in `node'. | 1050 /* Inserts a catenation node to the root of the tree given in `node'. |
1085 As the left child a new tag with number `tag_id' to `node' is added, | 1051 As the left child a new tag with number `tag_id' to `node' is added, |
1086 and the right child is the old root. */ | 1052 and the right child is the old root. */ |
1087 static reg_errcode_t | 1053 static reg_errcode_t tre_add_tag_left(tre_mem_t mem, |
1088 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id) | 1054 tre_ast_node_t* node, |
1089 { | 1055 int tag_id) { |
1090 tre_catenation_t *c; | 1056 tre_catenation_t* c; |
1091 | 1057 |
1092 c = tre_mem_alloc(mem, sizeof(*c)); | 1058 c = tre_mem_alloc(mem, sizeof(*c)); |
1093 if (c == NULL) | 1059 if (c == NULL) |
1094 return REG_ESPACE; | 1060 return REG_ESPACE; |
1095 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); | 1061 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); |
1096 if (c->left == NULL) | 1062 if (c->left == NULL) |
1097 return REG_ESPACE; | 1063 return REG_ESPACE; |
1098 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); | 1064 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); |
1099 if (c->right == NULL) | 1065 if (c->right == NULL) |
1100 return REG_ESPACE; | 1066 return REG_ESPACE; |
1101 | 1067 |
1102 c->right->obj = node->obj; | 1068 c->right->obj = node->obj; |
1103 c->right->type = node->type; | 1069 c->right->type = node->type; |
1104 c->right->nullable = -1; | 1070 c->right->nullable = -1; |
1105 c->right->submatch_id = -1; | 1071 c->right->submatch_id = -1; |
1106 c->right->firstpos = NULL; | 1072 c->right->firstpos = NULL; |
1107 c->right->lastpos = NULL; | 1073 c->right->lastpos = NULL; |
1108 c->right->num_tags = 0; | 1074 c->right->num_tags = 0; |
1109 node->obj = c; | 1075 node->obj = c; |
1110 node->type = CATENATION; | 1076 node->type = CATENATION; |
1111 return REG_OK; | 1077 return REG_OK; |
1112 } | 1078 } |
1113 | 1079 |
1114 /* Inserts a catenation node to the root of the tree given in `node'. | 1080 /* Inserts a catenation node to the root of the tree given in `node'. |
1115 As the right child a new tag with number `tag_id' to `node' is added, | 1081 As the right child a new tag with number `tag_id' to `node' is added, |
1116 and the left child is the old root. */ | 1082 and the left child is the old root. */ |
1117 static reg_errcode_t | 1083 static reg_errcode_t tre_add_tag_right(tre_mem_t mem, |
1118 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id) | 1084 tre_ast_node_t* node, |
1119 { | 1085 int tag_id) { |
1120 tre_catenation_t *c; | 1086 tre_catenation_t* c; |
1121 | 1087 |
1122 c = tre_mem_alloc(mem, sizeof(*c)); | 1088 c = tre_mem_alloc(mem, sizeof(*c)); |
1123 if (c == NULL) | 1089 if (c == NULL) |
1124 return REG_ESPACE; | 1090 return REG_ESPACE; |
1125 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1); | 1091 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1); |
1126 if (c->right == NULL) | 1092 if (c->right == NULL) |
1127 return REG_ESPACE; | 1093 return REG_ESPACE; |
1128 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); | 1094 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); |
1129 if (c->left == NULL) | 1095 if (c->left == NULL) |
1130 return REG_ESPACE; | 1096 return REG_ESPACE; |
(...skipping 13 matching lines...) Expand all Loading... |
1144 typedef enum { | 1110 typedef enum { |
1145 ADDTAGS_RECURSE, | 1111 ADDTAGS_RECURSE, |
1146 ADDTAGS_AFTER_ITERATION, | 1112 ADDTAGS_AFTER_ITERATION, |
1147 ADDTAGS_AFTER_UNION_LEFT, | 1113 ADDTAGS_AFTER_UNION_LEFT, |
1148 ADDTAGS_AFTER_UNION_RIGHT, | 1114 ADDTAGS_AFTER_UNION_RIGHT, |
1149 ADDTAGS_AFTER_CAT_LEFT, | 1115 ADDTAGS_AFTER_CAT_LEFT, |
1150 ADDTAGS_AFTER_CAT_RIGHT, | 1116 ADDTAGS_AFTER_CAT_RIGHT, |
1151 ADDTAGS_SET_SUBMATCH_END | 1117 ADDTAGS_SET_SUBMATCH_END |
1152 } tre_addtags_symbol_t; | 1118 } tre_addtags_symbol_t; |
1153 | 1119 |
1154 | |
1155 typedef struct { | 1120 typedef struct { |
1156 int tag; | 1121 int tag; |
1157 int next_tag; | 1122 int next_tag; |
1158 } tre_tag_states_t; | 1123 } tre_tag_states_t; |
1159 | 1124 |
1160 | |
1161 /* Go through `regset' and set submatch data for submatches that are | 1125 /* Go through `regset' and set submatch data for submatches that are |
1162 using this tag. */ | 1126 using this tag. */ |
1163 static void | 1127 static void tre_purge_regset(int* regset, tre_tnfa_t* tnfa, int tag) { |
1164 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag) | |
1165 { | |
1166 int i; | 1128 int i; |
1167 | 1129 |
1168 for (i = 0; regset[i] >= 0; i++) | 1130 for (i = 0; regset[i] >= 0; i++) { |
1169 { | 1131 int id = regset[i] / 2; |
1170 int id = regset[i] / 2; | 1132 int start = !(regset[i] % 2); |
1171 int start = !(regset[i] % 2); | 1133 if (start) |
1172 if (start) | 1134 tnfa->submatch_data[id].so_tag = tag; |
1173 » tnfa->submatch_data[id].so_tag = tag; | 1135 else |
1174 else | 1136 tnfa->submatch_data[id].eo_tag = tag; |
1175 » tnfa->submatch_data[id].eo_tag = tag; | 1137 } |
1176 } | |
1177 regset[0] = -1; | 1138 regset[0] = -1; |
1178 } | 1139 } |
1179 | 1140 |
1180 | |
1181 /* Adds tags to appropriate locations in the parse tree in `tree', so that | 1141 /* Adds tags to appropriate locations in the parse tree in `tree', so that |
1182 subexpressions marked for submatch addressing can be traced. */ | 1142 subexpressions marked for submatch addressing can be traced. */ |
1183 static reg_errcode_t | 1143 static reg_errcode_t tre_add_tags(tre_mem_t mem, |
1184 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, | 1144 tre_stack_t* stack, |
1185 » tre_tnfa_t *tnfa) | 1145 tre_ast_node_t* tree, |
1186 { | 1146 tre_tnfa_t* tnfa) { |
1187 reg_errcode_t status = REG_OK; | 1147 reg_errcode_t status = REG_OK; |
1188 tre_addtags_symbol_t symbol; | 1148 tre_addtags_symbol_t symbol; |
1189 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ | 1149 tre_ast_node_t* node = tree; /* Tree node we are currently looking at. */ |
1190 int bottom = tre_stack_num_objects(stack); | 1150 int bottom = tre_stack_num_objects(stack); |
1191 /* True for first pass (counting number of needed tags) */ | 1151 /* True for first pass (counting number of needed tags) */ |
1192 int first_pass = (mem == NULL || tnfa == NULL); | 1152 int first_pass = (mem == NULL || tnfa == NULL); |
1193 int *regset, *orig_regset; | 1153 int *regset, *orig_regset; |
1194 int num_tags = 0; /* Total number of tags. */ | 1154 int num_tags = 0; /* Total number of tags. */ |
1195 int num_minimals = 0;» /* Number of special minimal tags. */ | 1155 int num_minimals = 0; /* Number of special minimal tags. */ |
1196 int tag = 0;» /* The tag that is to be added next. */ | 1156 int tag = 0; /* The tag that is to be added next. */ |
1197 int next_tag = 1; /* Next tag to use after this one. */ | 1157 int next_tag = 1; /* Next tag to use after this one. */ |
1198 int *parents;» /* Stack of submatches the current submatch is | 1158 int* parents; /* Stack of submatches the current submatch is |
1199 » » contained in. */ | 1159 contained in. */ |
1200 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ | 1160 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ |
1201 tre_tag_states_t *saved_states; | 1161 tre_tag_states_t* saved_states; |
1202 | 1162 |
1203 tre_tag_direction_t direction = TRE_TAG_MINIMIZE; | 1163 tre_tag_direction_t direction = TRE_TAG_MINIMIZE; |
1204 if (!first_pass) | 1164 if (!first_pass) { |
1205 { | 1165 tnfa->end_tag = 0; |
1206 tnfa->end_tag = 0; | 1166 tnfa->minimal_tags[0] = -1; |
1207 tnfa->minimal_tags[0] = -1; | 1167 } |
1208 } | |
1209 | 1168 |
1210 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); | 1169 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); |
1211 if (regset == NULL) | 1170 if (regset == NULL) |
1212 return REG_ESPACE; | 1171 return REG_ESPACE; |
1213 regset[0] = -1; | 1172 regset[0] = -1; |
1214 orig_regset = regset; | 1173 orig_regset = regset; |
1215 | 1174 |
1216 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); | 1175 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); |
1217 if (parents == NULL) | 1176 if (parents == NULL) { |
1218 { | 1177 xfree(regset); |
1219 xfree(regset); | 1178 return REG_ESPACE; |
1220 return REG_ESPACE; | 1179 } |
1221 } | |
1222 parents[0] = -1; | 1180 parents[0] = -1; |
1223 | 1181 |
1224 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); | 1182 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); |
1225 if (saved_states == NULL) | 1183 if (saved_states == NULL) { |
1226 { | 1184 xfree(regset); |
1227 xfree(regset); | 1185 xfree(parents); |
1228 xfree(parents); | 1186 return REG_ESPACE; |
1229 return REG_ESPACE; | 1187 } else { |
1230 } | 1188 unsigned int i; |
1231 else | 1189 for (i = 0; i <= tnfa->num_submatches; i++) |
1232 { | 1190 saved_states[i].tag = -1; |
1233 unsigned int i; | 1191 } |
1234 for (i = 0; i <= tnfa->num_submatches; i++) | |
1235 » saved_states[i].tag = -1; | |
1236 } | |
1237 | 1192 |
1238 STACK_PUSH(stack, voidptr, node); | 1193 STACK_PUSH(stack, voidptr, node); |
1239 STACK_PUSH(stack, int, ADDTAGS_RECURSE); | 1194 STACK_PUSH(stack, int, ADDTAGS_RECURSE); |
1240 | 1195 |
1241 while (tre_stack_num_objects(stack) > bottom) | 1196 while (tre_stack_num_objects(stack) > bottom) { |
1242 { | 1197 if (status != REG_OK) |
1243 if (status != REG_OK) | 1198 break; |
1244 break; | 1199 |
1245 | 1200 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack); |
1246 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack); | 1201 switch (symbol) { |
1247 switch (symbol) | 1202 case ADDTAGS_SET_SUBMATCH_END: { |
1248 { | 1203 int id = tre_stack_pop_int(stack); |
1249 | 1204 int i; |
1250 case ADDTAGS_SET_SUBMATCH_END: | 1205 |
1251 { | 1206 /* Add end of this submatch to regset. */ |
1252 int id = tre_stack_pop_int(stack); | 1207 for (i = 0; regset[i] >= 0; i++) |
1253 int i; | 1208 ; |
1254 | 1209 regset[i] = id * 2 + 1; |
1255 /* Add end of this submatch to regset. */ | 1210 regset[i + 1] = -1; |
1256 for (i = 0; regset[i] >= 0; i++); | 1211 |
1257 regset[i] = id * 2 + 1; | 1212 /* Pop this submatch from the parents stack. */ |
1258 regset[i + 1] = -1; | 1213 for (i = 0; parents[i] >= 0; i++) |
1259 | 1214 ; |
1260 /* Pop this submatch from the parents stack. */ | 1215 parents[i - 1] = -1; |
1261 for (i = 0; parents[i] >= 0; i++); | 1216 break; |
1262 parents[i - 1] = -1; | 1217 } |
1263 break; | 1218 |
1264 } | 1219 case ADDTAGS_RECURSE: |
1265 | 1220 node = tre_stack_pop_voidptr(stack); |
1266 case ADDTAGS_RECURSE: | 1221 |
1267 node = tre_stack_pop_voidptr(stack); | 1222 if (node->submatch_id >= 0) { |
1268 | 1223 int id = node->submatch_id; |
1269 if (node->submatch_id >= 0) | 1224 int i; |
1270 { | 1225 |
1271 int id = node->submatch_id; | 1226 /* Add start of this submatch to regset. */ |
1272 int i; | 1227 for (i = 0; regset[i] >= 0; i++) |
1273 | 1228 ; |
1274 | 1229 regset[i] = id * 2; |
1275 /* Add start of this submatch to regset. */ | 1230 regset[i + 1] = -1; |
1276 for (i = 0; regset[i] >= 0; i++); | 1231 |
1277 regset[i] = id * 2; | 1232 if (!first_pass) { |
1278 regset[i + 1] = -1; | 1233 for (i = 0; parents[i] >= 0; i++) |
1279 | 1234 ; |
1280 if (!first_pass) | 1235 tnfa->submatch_data[id].parents = NULL; |
1281 { | 1236 if (i > 0) { |
1282 for (i = 0; parents[i] >= 0; i++); | 1237 int* p = xmalloc(sizeof(*p) * (i + 1)); |
1283 tnfa->submatch_data[id].parents = NULL; | 1238 if (p == NULL) { |
1284 if (i > 0) | 1239 status = REG_ESPACE; |
1285 { | 1240 break; |
1286 int *p = xmalloc(sizeof(*p) * (i + 1)); | 1241 } |
1287 if (p == NULL) | 1242 assert(tnfa->submatch_data[id].parents == NULL); |
1288 { | 1243 tnfa->submatch_data[id].parents = p; |
1289 status = REG_ESPACE; | 1244 for (i = 0; parents[i] >= 0; i++) |
1290 break; | 1245 p[i] = parents[i]; |
1291 } | 1246 p[i] = -1; |
1292 assert(tnfa->submatch_data[id].parents == NULL); | 1247 } |
1293 tnfa->submatch_data[id].parents = p; | 1248 } |
1294 for (i = 0; parents[i] >= 0; i++) | 1249 |
1295 p[i] = parents[i]; | 1250 /* Add end of this submatch to regset after processing this |
1296 p[i] = -1; | 1251 node. */ |
1297 } | 1252 STACK_PUSHX(stack, int, node->submatch_id); |
1298 } | 1253 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); |
1299 | 1254 } |
1300 /* Add end of this submatch to regset after processing this | 1255 |
1301 node. */ | 1256 switch (node->type) { |
1302 STACK_PUSHX(stack, int, node->submatch_id); | 1257 case LITERAL: { |
1303 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); | 1258 tre_literal_t* lit = node->obj; |
1304 } | 1259 |
1305 | 1260 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) { |
1306 switch (node->type) | 1261 int i; |
1307 { | 1262 if (regset[0] >= 0) { |
1308 case LITERAL: | 1263 /* Regset is not empty, so add a tag before the |
1309 { | 1264 literal or backref. */ |
1310 tre_literal_t *lit = node->obj; | 1265 if (!first_pass) { |
1311 | 1266 status = tre_add_tag_left(mem, node, tag); |
1312 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) | 1267 tnfa->tag_directions[tag] = direction; |
1313 { | 1268 if (minimal_tag >= 0) { |
1314 int i; | 1269 for (i = 0; tnfa->minimal_tags[i] >= 0; i++) |
1315 if (regset[0] >= 0) | 1270 ; |
1316 { | 1271 tnfa->minimal_tags[i] = tag; |
1317 /* Regset is not empty, so add a tag before the | 1272 tnfa->minimal_tags[i + 1] = minimal_tag; |
1318 literal or backref. */ | 1273 tnfa->minimal_tags[i + 2] = -1; |
1319 if (!first_pass) | 1274 minimal_tag = -1; |
1320 { | 1275 num_minimals++; |
1321 status = tre_add_tag_left(mem, node, tag); | 1276 } |
1322 tnfa->tag_directions[tag] = direction; | 1277 tre_purge_regset(regset, tnfa, tag); |
1323 if (minimal_tag >= 0) | 1278 } else { |
1324 { | 1279 node->num_tags = 1; |
1325 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); | 1280 } |
1326 tnfa->minimal_tags[i] = tag; | 1281 |
1327 tnfa->minimal_tags[i + 1] = minimal_tag; | 1282 regset[0] = -1; |
1328 tnfa->minimal_tags[i + 2] = -1; | 1283 tag = next_tag; |
1329 minimal_tag = -1; | 1284 num_tags++; |
1330 num_minimals++; | 1285 next_tag++; |
1331 } | 1286 } |
1332 tre_purge_regset(regset, tnfa, tag); | 1287 } else { |
1333 } | 1288 assert(!IS_TAG(lit)); |
1334 else | 1289 } |
1335 { | 1290 break; |
1336 node->num_tags = 1; | 1291 } |
1337 } | 1292 case CATENATION: { |
1338 | 1293 tre_catenation_t* cat = node->obj; |
1339 regset[0] = -1; | 1294 tre_ast_node_t* left = cat->left; |
1340 tag = next_tag; | 1295 tre_ast_node_t* right = cat->right; |
1341 num_tags++; | 1296 int reserved_tag = -1; |
1342 next_tag++; | 1297 |
1343 } | 1298 /* After processing right child. */ |
1344 } | 1299 STACK_PUSHX(stack, voidptr, node); |
1345 else | 1300 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); |
1346 { | 1301 |
1347 assert(!IS_TAG(lit)); | 1302 /* Process right child. */ |
1348 } | 1303 STACK_PUSHX(stack, voidptr, right); |
1349 break; | 1304 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); |
1350 } | 1305 |
1351 case CATENATION: | 1306 /* After processing left child. */ |
1352 { | 1307 STACK_PUSHX(stack, int, next_tag + left->num_tags); |
1353 tre_catenation_t *cat = node->obj; | 1308 if (left->num_tags > 0 && right->num_tags > 0) { |
1354 tre_ast_node_t *left = cat->left; | 1309 /* Reserve the next tag to the right child. */ |
1355 tre_ast_node_t *right = cat->right; | 1310 reserved_tag = next_tag; |
1356 int reserved_tag = -1; | 1311 next_tag++; |
1357 | 1312 } |
1358 | 1313 STACK_PUSHX(stack, int, reserved_tag); |
1359 /* After processing right child. */ | 1314 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); |
1360 STACK_PUSHX(stack, voidptr, node); | 1315 |
1361 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); | 1316 /* Process left child. */ |
1362 | 1317 STACK_PUSHX(stack, voidptr, left); |
1363 /* Process right child. */ | 1318 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); |
1364 STACK_PUSHX(stack, voidptr, right); | 1319 |
1365 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); | 1320 } break; |
1366 | 1321 case ITERATION: { |
1367 /* After processing left child. */ | 1322 tre_iteration_t* iter = node->obj; |
1368 STACK_PUSHX(stack, int, next_tag + left->num_tags); | 1323 |
1369 if (left->num_tags > 0 && right->num_tags > 0) | 1324 if (first_pass) { |
1370 { | 1325 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); |
1371 /* Reserve the next tag to the right child. */ | 1326 } else { |
1372 reserved_tag = next_tag; | 1327 STACK_PUSHX(stack, int, tag); |
1373 next_tag++; | 1328 STACK_PUSHX(stack, int, iter->minimal); |
1374 } | 1329 } |
1375 STACK_PUSHX(stack, int, reserved_tag); | 1330 STACK_PUSHX(stack, voidptr, node); |
1376 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); | 1331 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); |
1377 | 1332 |
1378 /* Process left child. */ | 1333 STACK_PUSHX(stack, voidptr, iter->arg); |
1379 STACK_PUSHX(stack, voidptr, left); | 1334 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); |
1380 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); | 1335 |
1381 | 1336 /* Regset is not empty, so add a tag here. */ |
1382 } | 1337 if (regset[0] >= 0 || iter->minimal) { |
1383 break; | 1338 if (!first_pass) { |
1384 case ITERATION: | 1339 int i; |
1385 { | 1340 status = tre_add_tag_left(mem, node, tag); |
1386 tre_iteration_t *iter = node->obj; | 1341 if (iter->minimal) |
1387 | 1342 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; |
1388 if (first_pass) | 1343 else |
1389 { | 1344 tnfa->tag_directions[tag] = direction; |
1390 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); | 1345 if (minimal_tag >= 0) { |
1391 } | 1346 for (i = 0; tnfa->minimal_tags[i] >= 0; i++) |
1392 else | 1347 ; |
1393 { | 1348 tnfa->minimal_tags[i] = tag; |
1394 STACK_PUSHX(stack, int, tag); | 1349 tnfa->minimal_tags[i + 1] = minimal_tag; |
1395 STACK_PUSHX(stack, int, iter->minimal); | 1350 tnfa->minimal_tags[i + 2] = -1; |
1396 } | 1351 minimal_tag = -1; |
1397 STACK_PUSHX(stack, voidptr, node); | 1352 num_minimals++; |
1398 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); | 1353 } |
1399 | 1354 tre_purge_regset(regset, tnfa, tag); |
1400 STACK_PUSHX(stack, voidptr, iter->arg); | 1355 } |
1401 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); | 1356 |
1402 | 1357 regset[0] = -1; |
1403 /* Regset is not empty, so add a tag here. */ | 1358 tag = next_tag; |
1404 if (regset[0] >= 0 || iter->minimal) | 1359 num_tags++; |
1405 { | 1360 next_tag++; |
1406 if (!first_pass) | 1361 } |
1407 { | 1362 direction = TRE_TAG_MINIMIZE; |
1408 int i; | 1363 } break; |
1409 status = tre_add_tag_left(mem, node, tag); | 1364 case UNION: { |
1410 if (iter->minimal) | 1365 tre_union_t* uni = node->obj; |
1411 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; | 1366 tre_ast_node_t* left = uni->left; |
1412 else | 1367 tre_ast_node_t* right = uni->right; |
1413 tnfa->tag_directions[tag] = direction; | 1368 int left_tag; |
1414 if (minimal_tag >= 0) | 1369 int right_tag; |
1415 { | 1370 |
1416 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); | 1371 if (regset[0] >= 0) { |
1417 tnfa->minimal_tags[i] = tag; | 1372 left_tag = next_tag; |
1418 tnfa->minimal_tags[i + 1] = minimal_tag; | 1373 right_tag = next_tag + 1; |
1419 tnfa->minimal_tags[i + 2] = -1; | 1374 } else { |
1420 minimal_tag = -1; | 1375 left_tag = tag; |
1421 num_minimals++; | 1376 right_tag = next_tag; |
1422 } | 1377 } |
1423 tre_purge_regset(regset, tnfa, tag); | 1378 |
1424 } | 1379 /* After processing right child. */ |
1425 | 1380 STACK_PUSHX(stack, int, right_tag); |
1426 regset[0] = -1; | 1381 STACK_PUSHX(stack, int, left_tag); |
1427 tag = next_tag; | 1382 STACK_PUSHX(stack, voidptr, regset); |
1428 num_tags++; | 1383 STACK_PUSHX(stack, int, regset[0] >= 0); |
1429 next_tag++; | 1384 STACK_PUSHX(stack, voidptr, node); |
1430 } | 1385 STACK_PUSHX(stack, voidptr, right); |
1431 direction = TRE_TAG_MINIMIZE; | 1386 STACK_PUSHX(stack, voidptr, left); |
1432 } | 1387 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); |
1433 break; | 1388 |
1434 case UNION: | 1389 /* Process right child. */ |
1435 { | 1390 STACK_PUSHX(stack, voidptr, right); |
1436 tre_union_t *uni = node->obj; | 1391 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); |
1437 tre_ast_node_t *left = uni->left; | 1392 |
1438 tre_ast_node_t *right = uni->right; | 1393 /* After processing left child. */ |
1439 int left_tag; | 1394 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); |
1440 int right_tag; | 1395 |
1441 | 1396 /* Process left child. */ |
1442 if (regset[0] >= 0) | 1397 STACK_PUSHX(stack, voidptr, left); |
1443 { | 1398 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); |
1444 left_tag = next_tag; | 1399 |
1445 right_tag = next_tag + 1; | 1400 /* Regset is not empty, so add a tag here. */ |
1446 } | 1401 if (regset[0] >= 0) { |
1447 else | 1402 if (!first_pass) { |
1448 { | 1403 int i; |
1449 left_tag = tag; | 1404 status = tre_add_tag_left(mem, node, tag); |
1450 right_tag = next_tag; | 1405 tnfa->tag_directions[tag] = direction; |
1451 } | 1406 if (minimal_tag >= 0) { |
1452 | 1407 for (i = 0; tnfa->minimal_tags[i] >= 0; i++) |
1453 /* After processing right child. */ | 1408 ; |
1454 STACK_PUSHX(stack, int, right_tag); | 1409 tnfa->minimal_tags[i] = tag; |
1455 STACK_PUSHX(stack, int, left_tag); | 1410 tnfa->minimal_tags[i + 1] = minimal_tag; |
1456 STACK_PUSHX(stack, voidptr, regset); | 1411 tnfa->minimal_tags[i + 2] = -1; |
1457 STACK_PUSHX(stack, int, regset[0] >= 0); | 1412 minimal_tag = -1; |
1458 STACK_PUSHX(stack, voidptr, node); | 1413 num_minimals++; |
1459 STACK_PUSHX(stack, voidptr, right); | 1414 } |
1460 STACK_PUSHX(stack, voidptr, left); | 1415 tre_purge_regset(regset, tnfa, tag); |
1461 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); | 1416 } |
1462 | 1417 |
1463 /* Process right child. */ | 1418 regset[0] = -1; |
1464 STACK_PUSHX(stack, voidptr, right); | 1419 tag = next_tag; |
1465 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); | 1420 num_tags++; |
1466 | 1421 next_tag++; |
1467 /* After processing left child. */ | 1422 } |
1468 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); | 1423 |
1469 | 1424 if (node->num_submatches > 0) { |
1470 /* Process left child. */ | 1425 /* The next two tags are reserved for markers. */ |
1471 STACK_PUSHX(stack, voidptr, left); | 1426 next_tag++; |
1472 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); | 1427 tag = next_tag; |
1473 | 1428 next_tag++; |
1474 /* Regset is not empty, so add a tag here. */ | 1429 } |
1475 if (regset[0] >= 0) | 1430 |
1476 { | 1431 break; |
1477 if (!first_pass) | 1432 } |
1478 { | 1433 } |
1479 int i; | 1434 |
1480 status = tre_add_tag_left(mem, node, tag); | 1435 if (node->submatch_id >= 0) { |
1481 tnfa->tag_directions[tag] = direction; | 1436 int i; |
1482 if (minimal_tag >= 0) | 1437 /* Push this submatch on the parents stack. */ |
1483 { | 1438 for (i = 0; parents[i] >= 0; i++) |
1484 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); | 1439 ; |
1485 tnfa->minimal_tags[i] = tag; | 1440 parents[i] = node->submatch_id; |
1486 tnfa->minimal_tags[i + 1] = minimal_tag; | 1441 parents[i + 1] = -1; |
1487 tnfa->minimal_tags[i + 2] = -1; | 1442 } |
1488 minimal_tag = -1; | 1443 |
1489 num_minimals++; | 1444 break; /* end case: ADDTAGS_RECURSE */ |
1490 } | 1445 |
1491 tre_purge_regset(regset, tnfa, tag); | 1446 case ADDTAGS_AFTER_ITERATION: { |
1492 } | 1447 int minimal = 0; |
1493 | 1448 int enter_tag; |
1494 regset[0] = -1; | 1449 node = tre_stack_pop_voidptr(stack); |
1495 tag = next_tag; | 1450 if (first_pass) { |
1496 num_tags++; | 1451 node->num_tags = ((tre_iteration_t*)node->obj)->arg->num_tags + |
1497 next_tag++; | 1452 tre_stack_pop_int(stack); |
1498 } | 1453 minimal_tag = -1; |
1499 | 1454 } else { |
1500 if (node->num_submatches > 0) | 1455 minimal = tre_stack_pop_int(stack); |
1501 { | 1456 enter_tag = tre_stack_pop_int(stack); |
1502 /* The next two tags are reserved for markers. */ | 1457 if (minimal) |
1503 next_tag++; | 1458 minimal_tag = enter_tag; |
1504 tag = next_tag; | 1459 } |
1505 next_tag++; | 1460 |
1506 } | 1461 if (!first_pass) { |
1507 | 1462 if (minimal) |
1508 break; | 1463 direction = TRE_TAG_MINIMIZE; |
1509 } | 1464 else |
1510 } | 1465 direction = TRE_TAG_MAXIMIZE; |
1511 | 1466 } |
1512 if (node->submatch_id >= 0) | 1467 break; |
1513 { | 1468 } |
1514 int i; | 1469 |
1515 /* Push this submatch on the parents stack. */ | 1470 case ADDTAGS_AFTER_CAT_LEFT: { |
1516 for (i = 0; parents[i] >= 0; i++); | 1471 int new_tag = tre_stack_pop_int(stack); |
1517 parents[i] = node->submatch_id; | 1472 next_tag = tre_stack_pop_int(stack); |
1518 parents[i + 1] = -1; | 1473 if (new_tag >= 0) { |
1519 } | 1474 tag = new_tag; |
1520 | 1475 } |
1521 break; /* end case: ADDTAGS_RECURSE */ | 1476 break; |
1522 | 1477 } |
1523 case ADDTAGS_AFTER_ITERATION: | 1478 |
1524 { | 1479 case ADDTAGS_AFTER_CAT_RIGHT: |
1525 int minimal = 0; | 1480 node = tre_stack_pop_voidptr(stack); |
1526 int enter_tag; | 1481 if (first_pass) |
1527 node = tre_stack_pop_voidptr(stack); | 1482 node->num_tags = ((tre_catenation_t*)node->obj)->left->num_tags + |
1528 if (first_pass) | 1483 ((tre_catenation_t*)node->obj)->right->num_tags; |
1529 { | 1484 break; |
1530 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags | 1485 |
1531 + tre_stack_pop_int(stack); | 1486 case ADDTAGS_AFTER_UNION_LEFT: |
1532 minimal_tag = -1; | 1487 /* Lift the bottom of the `regset' array so that when processing |
1533 } | 1488 the right operand the items currently in the array are |
1534 else | 1489 invisible. The original bottom was saved at ADDTAGS_UNION and |
1535 { | 1490 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ |
1536 minimal = tre_stack_pop_int(stack); | 1491 while (*regset >= 0) |
1537 enter_tag = tre_stack_pop_int(stack); | 1492 regset++; |
1538 if (minimal) | 1493 break; |
1539 minimal_tag = enter_tag; | 1494 |
1540 } | 1495 case ADDTAGS_AFTER_UNION_RIGHT: { |
1541 | 1496 int added_tags, tag_left, tag_right; |
1542 if (!first_pass) | 1497 tre_ast_node_t* left = tre_stack_pop_voidptr(stack); |
1543 { | 1498 tre_ast_node_t* right = tre_stack_pop_voidptr(stack); |
1544 if (minimal) | 1499 node = tre_stack_pop_voidptr(stack); |
1545 direction = TRE_TAG_MINIMIZE; | 1500 added_tags = tre_stack_pop_int(stack); |
1546 else | 1501 if (first_pass) { |
1547 direction = TRE_TAG_MAXIMIZE; | 1502 node->num_tags = ((tre_union_t*)node->obj)->left->num_tags + |
1548 } | 1503 ((tre_union_t*)node->obj)->right->num_tags + |
1549 break; | 1504 added_tags + ((node->num_submatches > 0) ? 2 : 0); |
1550 } | 1505 } |
1551 | 1506 regset = tre_stack_pop_voidptr(stack); |
1552 case ADDTAGS_AFTER_CAT_LEFT: | 1507 tag_left = tre_stack_pop_int(stack); |
1553 { | 1508 tag_right = tre_stack_pop_int(stack); |
1554 int new_tag = tre_stack_pop_int(stack); | 1509 |
1555 next_tag = tre_stack_pop_int(stack); | 1510 /* Add tags after both children, the left child gets a smaller |
1556 if (new_tag >= 0) | 1511 tag than the right child. This guarantees that we prefer |
1557 { | 1512 the left child over the right child. */ |
1558 tag = new_tag; | 1513 /* XXX - This is not always necessary (if the children have |
1559 } | 1514 tags which must be seen for every match of that child). */ |
1560 break; | 1515 /* XXX - Check if this is the only place where tre_add_tag_right |
1561 } | 1516 is used. If so, use tre_add_tag_left (putting the tag before |
1562 | 1517 the child as opposed after the child) and throw away |
1563 case ADDTAGS_AFTER_CAT_RIGHT: | 1518 tre_add_tag_right. */ |
1564 node = tre_stack_pop_voidptr(stack); | 1519 if (node->num_submatches > 0) { |
1565 if (first_pass) | 1520 if (!first_pass) { |
1566 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags | 1521 status = tre_add_tag_right(mem, left, tag_left); |
1567 + ((tre_catenation_t *)node->obj)->right->num_tags; | 1522 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; |
1568 break; | 1523 if (status == REG_OK) |
1569 | 1524 status = tre_add_tag_right(mem, right, tag_right); |
1570 case ADDTAGS_AFTER_UNION_LEFT: | 1525 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; |
1571 /* Lift the bottom of the `regset' array so that when processing | 1526 } |
1572 the right operand the items currently in the array are | 1527 num_tags += 2; |
1573 invisible. The original bottom was saved at ADDTAGS_UNION and | 1528 } |
1574 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ | 1529 direction = TRE_TAG_MAXIMIZE; |
1575 while (*regset >= 0) | 1530 break; |
1576 regset++; | 1531 } |
1577 break; | 1532 |
1578 | 1533 default: |
1579 case ADDTAGS_AFTER_UNION_RIGHT: | 1534 assert(0); |
1580 { | 1535 break; |
1581 int added_tags, tag_left, tag_right; | 1536 |
1582 tre_ast_node_t *left = tre_stack_pop_voidptr(stack); | 1537 } /* end switch(symbol) */ |
1583 tre_ast_node_t *right = tre_stack_pop_voidptr(stack); | 1538 } /* end while(tre_stack_num_objects(stack) > bottom) */ |
1584 node = tre_stack_pop_voidptr(stack); | |
1585 added_tags = tre_stack_pop_int(stack); | |
1586 if (first_pass) | |
1587 { | |
1588 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags | |
1589 + ((tre_union_t *)node->obj)->right->num_tags + added_tags | |
1590 + ((node->num_submatches > 0) ? 2 : 0); | |
1591 } | |
1592 regset = tre_stack_pop_voidptr(stack); | |
1593 tag_left = tre_stack_pop_int(stack); | |
1594 tag_right = tre_stack_pop_int(stack); | |
1595 | |
1596 /* Add tags after both children, the left child gets a smaller | |
1597 tag than the right child. This guarantees that we prefer | |
1598 the left child over the right child. */ | |
1599 /* XXX - This is not always necessary (if the children have | |
1600 tags which must be seen for every match of that child). */ | |
1601 /* XXX - Check if this is the only place where tre_add_tag_right | |
1602 is used. If so, use tre_add_tag_left (putting the tag before | |
1603 the child as opposed after the child) and throw away | |
1604 tre_add_tag_right. */ | |
1605 if (node->num_submatches > 0) | |
1606 { | |
1607 if (!first_pass) | |
1608 { | |
1609 status = tre_add_tag_right(mem, left, tag_left); | |
1610 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; | |
1611 if (status == REG_OK) | |
1612 status = tre_add_tag_right(mem, right, tag_right); | |
1613 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; | |
1614 } | |
1615 num_tags += 2; | |
1616 } | |
1617 direction = TRE_TAG_MAXIMIZE; | |
1618 break; | |
1619 } | |
1620 | |
1621 default: | |
1622 assert(0); | |
1623 break; | |
1624 | |
1625 } /* end switch(symbol) */ | |
1626 } /* end while(tre_stack_num_objects(stack) > bottom) */ | |
1627 | 1539 |
1628 if (!first_pass) | 1540 if (!first_pass) |
1629 tre_purge_regset(regset, tnfa, tag); | 1541 tre_purge_regset(regset, tnfa, tag); |
1630 | 1542 |
1631 if (!first_pass && minimal_tag >= 0) | 1543 if (!first_pass && minimal_tag >= 0) { |
1632 { | 1544 int i; |
1633 int i; | 1545 for (i = 0; tnfa->minimal_tags[i] >= 0; i++) |
1634 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); | 1546 ; |
1635 tnfa->minimal_tags[i] = tag; | 1547 tnfa->minimal_tags[i] = tag; |
1636 tnfa->minimal_tags[i + 1] = minimal_tag; | 1548 tnfa->minimal_tags[i + 1] = minimal_tag; |
1637 tnfa->minimal_tags[i + 2] = -1; | 1549 tnfa->minimal_tags[i + 2] = -1; |
1638 minimal_tag = -1; | 1550 minimal_tag = -1; |
1639 num_minimals++; | 1551 num_minimals++; |
1640 } | 1552 } |
1641 | 1553 |
1642 assert(tree->num_tags == num_tags); | 1554 assert(tree->num_tags == num_tags); |
1643 tnfa->end_tag = num_tags; | 1555 tnfa->end_tag = num_tags; |
1644 tnfa->num_tags = num_tags; | 1556 tnfa->num_tags = num_tags; |
1645 tnfa->num_minimals = num_minimals; | 1557 tnfa->num_minimals = num_minimals; |
1646 xfree(orig_regset); | 1558 xfree(orig_regset); |
1647 xfree(parents); | 1559 xfree(parents); |
1648 xfree(saved_states); | 1560 xfree(saved_states); |
1649 return status; | 1561 return status; |
1650 } | 1562 } |
1651 | 1563 |
1652 | |
1653 | |
1654 /* | 1564 /* |
1655 AST to TNFA compilation routines. | 1565 AST to TNFA compilation routines. |
1656 */ | 1566 */ |
1657 | 1567 |
1658 typedef enum { | 1568 typedef enum { COPY_RECURSE, COPY_SET_RESULT_PTR } tre_copyast_symbol_t; |
1659 COPY_RECURSE, | |
1660 COPY_SET_RESULT_PTR | |
1661 } tre_copyast_symbol_t; | |
1662 | 1569 |
1663 /* Flags for tre_copy_ast(). */ | 1570 /* Flags for tre_copy_ast(). */ |
1664 #define COPY_REMOVE_TAGS» 1 | 1571 #define COPY_REMOVE_TAGS 1 |
1665 #define COPY_MAXIMIZE_FIRST_TAG» 2 | 1572 #define COPY_MAXIMIZE_FIRST_TAG 2 |
1666 | 1573 |
1667 static reg_errcode_t | 1574 static reg_errcode_t tre_copy_ast(tre_mem_t mem, |
1668 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, | 1575 tre_stack_t* stack, |
1669 » int flags, int *pos_add, tre_tag_direction_t *tag_directions, | 1576 tre_ast_node_t* ast, |
1670 » tre_ast_node_t **copy, int *max_pos) | 1577 int flags, |
1671 { | 1578 int* pos_add, |
| 1579 tre_tag_direction_t* tag_directions, |
| 1580 tre_ast_node_t** copy, |
| 1581 int* max_pos) { |
1672 reg_errcode_t status = REG_OK; | 1582 reg_errcode_t status = REG_OK; |
1673 int bottom = tre_stack_num_objects(stack); | 1583 int bottom = tre_stack_num_objects(stack); |
1674 int num_copied = 0; | 1584 int num_copied = 0; |
1675 int first_tag = 1; | 1585 int first_tag = 1; |
1676 tre_ast_node_t **result = copy; | 1586 tre_ast_node_t** result = copy; |
1677 tre_copyast_symbol_t symbol; | 1587 tre_copyast_symbol_t symbol; |
1678 | 1588 |
1679 STACK_PUSH(stack, voidptr, ast); | 1589 STACK_PUSH(stack, voidptr, ast); |
1680 STACK_PUSH(stack, int, COPY_RECURSE); | 1590 STACK_PUSH(stack, int, COPY_RECURSE); |
1681 | 1591 |
1682 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) | 1592 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) { |
1683 { | 1593 tre_ast_node_t* node; |
1684 tre_ast_node_t *node; | 1594 if (status != REG_OK) |
1685 if (status != REG_OK) | 1595 break; |
1686 » break; | |
1687 | 1596 |
1688 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack); | 1597 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack); |
1689 switch (symbol) | 1598 switch (symbol) { |
1690 » { | 1599 case COPY_SET_RESULT_PTR: |
1691 » case COPY_SET_RESULT_PTR: | 1600 result = tre_stack_pop_voidptr(stack); |
1692 » result = tre_stack_pop_voidptr(stack); | 1601 break; |
1693 » break; | 1602 case COPY_RECURSE: |
1694 » case COPY_RECURSE: | 1603 node = tre_stack_pop_voidptr(stack); |
1695 » node = tre_stack_pop_voidptr(stack); | 1604 switch (node->type) { |
1696 » switch (node->type) | 1605 case LITERAL: { |
1697 » { | 1606 tre_literal_t* lit = node->obj; |
1698 » case LITERAL: | 1607 int pos = lit->position; |
1699 » { | 1608 int min = lit->code_min; |
1700 » » tre_literal_t *lit = node->obj; | 1609 int max = lit->code_max; |
1701 » » int pos = lit->position; | 1610 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) { |
1702 » » int min = lit->code_min; | 1611 /* XXX - e.g. [ab] has only one position but two |
1703 » » int max = lit->code_max; | 1612 nodes, so we are creating holes in the state space |
1704 » » if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) | 1613 here. Not fatal, just wastes memory. */ |
1705 » » { | 1614 pos += *pos_add; |
1706 » » /* XXX - e.g. [ab] has only one position but two | 1615 num_copied++; |
1707 » » nodes, so we are creating holes in the state space | 1616 } else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) { |
1708 » » here. Not fatal, just wastes memory. */ | 1617 /* Change this tag to empty. */ |
1709 » » pos += *pos_add; | 1618 min = EMPTY; |
1710 » » num_copied++; | 1619 max = pos = -1; |
1711 » » } | 1620 } else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) && |
1712 » » else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) | 1621 first_tag) { |
1713 » » { | 1622 /* Maximize the first tag. */ |
1714 » » /* Change this tag to empty. */ | 1623 tag_directions[max] = TRE_TAG_MAXIMIZE; |
1715 » » min = EMPTY; | 1624 first_tag = 0; |
1716 » » max = pos = -1; | 1625 } |
1717 » » } | 1626 *result = tre_ast_new_literal(mem, min, max, pos); |
1718 » » else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) | 1627 if (*result == NULL) |
1719 » » » && first_tag) | 1628 status = REG_ESPACE; |
1720 » » { | 1629 else { |
1721 » » /* Maximize the first tag. */ | 1630 tre_literal_t* p = (*result)->obj; |
1722 » » tag_directions[max] = TRE_TAG_MAXIMIZE; | 1631 p->class = lit->class; |
1723 » » first_tag = 0; | 1632 p->neg_classes = lit->neg_classes; |
1724 » » } | 1633 } |
1725 » » *result = tre_ast_new_literal(mem, min, max, pos); | |
1726 » » if (*result == NULL) | |
1727 » » status = REG_ESPACE; | |
1728 » » else { | |
1729 » » tre_literal_t *p = (*result)->obj; | |
1730 » » p->class = lit->class; | |
1731 » » p->neg_classes = lit->neg_classes; | |
1732 » » } | |
1733 | 1634 |
1734 » » if (pos > *max_pos) | 1635 if (pos > *max_pos) |
1735 » » *max_pos = pos; | 1636 *max_pos = pos; |
1736 » » break; | 1637 break; |
1737 » } | 1638 } |
1738 » case UNION: | 1639 case UNION: { |
1739 » { | 1640 tre_union_t* uni = node->obj; |
1740 » » tre_union_t *uni = node->obj; | 1641 tre_union_t* tmp; |
1741 » » tre_union_t *tmp; | 1642 *result = tre_ast_new_union(mem, uni->left, uni->right); |
1742 » » *result = tre_ast_new_union(mem, uni->left, uni->right); | 1643 if (*result == NULL) { |
1743 » » if (*result == NULL) | 1644 status = REG_ESPACE; |
1744 » » { | 1645 break; |
1745 » » status = REG_ESPACE; | 1646 } |
1746 » » break; | 1647 tmp = (*result)->obj; |
1747 » » } | 1648 result = &tmp->left; |
1748 » » tmp = (*result)->obj; | 1649 STACK_PUSHX(stack, voidptr, uni->right); |
1749 » » result = &tmp->left; | 1650 STACK_PUSHX(stack, int, COPY_RECURSE); |
1750 » » STACK_PUSHX(stack, voidptr, uni->right); | 1651 STACK_PUSHX(stack, voidptr, &tmp->right); |
1751 » » STACK_PUSHX(stack, int, COPY_RECURSE); | 1652 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); |
1752 » » STACK_PUSHX(stack, voidptr, &tmp->right); | 1653 STACK_PUSHX(stack, voidptr, uni->left); |
1753 » » STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); | 1654 STACK_PUSHX(stack, int, COPY_RECURSE); |
1754 » » STACK_PUSHX(stack, voidptr, uni->left); | 1655 break; |
1755 » » STACK_PUSHX(stack, int, COPY_RECURSE); | 1656 } |
1756 » » break; | 1657 case CATENATION: { |
1757 » } | 1658 tre_catenation_t* cat = node->obj; |
1758 » case CATENATION: | 1659 tre_catenation_t* tmp; |
1759 » { | 1660 *result = tre_ast_new_catenation(mem, cat->left, cat->right); |
1760 » » tre_catenation_t *cat = node->obj; | 1661 if (*result == NULL) { |
1761 » » tre_catenation_t *tmp; | 1662 status = REG_ESPACE; |
1762 » » *result = tre_ast_new_catenation(mem, cat->left, cat->right); | 1663 break; |
1763 » » if (*result == NULL) | 1664 } |
1764 » » { | 1665 tmp = (*result)->obj; |
1765 » » status = REG_ESPACE; | 1666 tmp->left = NULL; |
1766 » » break; | 1667 tmp->right = NULL; |
1767 » » } | 1668 result = &tmp->left; |
1768 » » tmp = (*result)->obj; | |
1769 » » tmp->left = NULL; | |
1770 » » tmp->right = NULL; | |
1771 » » result = &tmp->left; | |
1772 | 1669 |
1773 » » STACK_PUSHX(stack, voidptr, cat->right); | 1670 STACK_PUSHX(stack, voidptr, cat->right); |
1774 » » STACK_PUSHX(stack, int, COPY_RECURSE); | 1671 STACK_PUSHX(stack, int, COPY_RECURSE); |
1775 » » STACK_PUSHX(stack, voidptr, &tmp->right); | 1672 STACK_PUSHX(stack, voidptr, &tmp->right); |
1776 » » STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); | 1673 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); |
1777 » » STACK_PUSHX(stack, voidptr, cat->left); | 1674 STACK_PUSHX(stack, voidptr, cat->left); |
1778 » » STACK_PUSHX(stack, int, COPY_RECURSE); | 1675 STACK_PUSHX(stack, int, COPY_RECURSE); |
1779 » » break; | 1676 break; |
1780 » } | 1677 } |
1781 » case ITERATION: | 1678 case ITERATION: { |
1782 » { | 1679 tre_iteration_t* iter = node->obj; |
1783 » » tre_iteration_t *iter = node->obj; | 1680 STACK_PUSHX(stack, voidptr, iter->arg); |
1784 » » STACK_PUSHX(stack, voidptr, iter->arg); | 1681 STACK_PUSHX(stack, int, COPY_RECURSE); |
1785 » » STACK_PUSHX(stack, int, COPY_RECURSE); | 1682 *result = tre_ast_new_iter(mem, iter->arg, iter->min, iter->max, |
1786 » » *result = tre_ast_new_iter(mem, iter->arg, iter->min, | 1683 iter->minimal); |
1787 » » » » » iter->max, iter->minimal); | 1684 if (*result == NULL) { |
1788 » » if (*result == NULL) | 1685 status = REG_ESPACE; |
1789 » » { | 1686 break; |
1790 » » status = REG_ESPACE; | 1687 } |
1791 » » break; | 1688 iter = (*result)->obj; |
1792 » » } | 1689 result = &iter->arg; |
1793 » » iter = (*result)->obj; | 1690 break; |
1794 » » result = &iter->arg; | 1691 } |
1795 » » break; | 1692 default: |
1796 » } | 1693 assert(0); |
1797 » default: | 1694 break; |
1798 » assert(0); | 1695 } |
1799 » break; | 1696 break; |
1800 » } | |
1801 » break; | |
1802 » } | |
1803 } | 1697 } |
| 1698 } |
1804 *pos_add += num_copied; | 1699 *pos_add += num_copied; |
1805 return status; | 1700 return status; |
1806 } | 1701 } |
1807 | 1702 |
1808 typedef enum { | 1703 typedef enum { EXPAND_RECURSE, EXPAND_AFTER_ITER } tre_expand_ast_symbol_t; |
1809 EXPAND_RECURSE, | |
1810 EXPAND_AFTER_ITER | |
1811 } tre_expand_ast_symbol_t; | |
1812 | 1704 |
1813 /* Expands each iteration node that has a finite nonzero minimum or maximum | 1705 /* Expands each iteration node that has a finite nonzero minimum or maximum |
1814 iteration count to a catenated sequence of copies of the node. */ | 1706 iteration count to a catenated sequence of copies of the node. */ |
1815 static reg_errcode_t | 1707 static reg_errcode_t tre_expand_ast(tre_mem_t mem, |
1816 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, | 1708 tre_stack_t* stack, |
1817 » int *position, tre_tag_direction_t *tag_directions) | 1709 tre_ast_node_t* ast, |
1818 { | 1710 int* position, |
| 1711 tre_tag_direction_t* tag_directions) { |
1819 reg_errcode_t status = REG_OK; | 1712 reg_errcode_t status = REG_OK; |
1820 int bottom = tre_stack_num_objects(stack); | 1713 int bottom = tre_stack_num_objects(stack); |
1821 int pos_add = 0; | 1714 int pos_add = 0; |
1822 int pos_add_total = 0; | 1715 int pos_add_total = 0; |
1823 int max_pos = 0; | 1716 int max_pos = 0; |
1824 int iter_depth = 0; | 1717 int iter_depth = 0; |
1825 | 1718 |
1826 STACK_PUSHR(stack, voidptr, ast); | 1719 STACK_PUSHR(stack, voidptr, ast); |
1827 STACK_PUSHR(stack, int, EXPAND_RECURSE); | 1720 STACK_PUSHR(stack, int, EXPAND_RECURSE); |
1828 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) | 1721 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) { |
1829 { | 1722 tre_ast_node_t* node; |
1830 tre_ast_node_t *node; | 1723 tre_expand_ast_symbol_t symbol; |
1831 tre_expand_ast_symbol_t symbol; | |
1832 | 1724 |
1833 if (status != REG_OK) | 1725 if (status != REG_OK) |
1834 » break; | 1726 break; |
1835 | 1727 |
1836 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack); | 1728 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack); |
1837 node = tre_stack_pop_voidptr(stack); | 1729 node = tre_stack_pop_voidptr(stack); |
1838 switch (symbol) | 1730 switch (symbol) { |
1839 » { | 1731 case EXPAND_RECURSE: |
1840 » case EXPAND_RECURSE: | 1732 switch (node->type) { |
1841 » switch (node->type) | 1733 case LITERAL: { |
1842 » { | 1734 tre_literal_t* lit = node->obj; |
1843 » case LITERAL: | 1735 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) { |
1844 » { | 1736 lit->position += pos_add; |
1845 » » tre_literal_t *lit= node->obj; | 1737 if (lit->position > max_pos) |
1846 » » if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) | 1738 max_pos = lit->position; |
1847 » » { | 1739 } |
1848 » » lit->position += pos_add; | 1740 break; |
1849 » » if (lit->position > max_pos) | 1741 } |
1850 » » max_pos = lit->position; | 1742 case UNION: { |
1851 » » } | 1743 tre_union_t* uni = node->obj; |
1852 » » break; | 1744 STACK_PUSHX(stack, voidptr, uni->right); |
1853 » } | 1745 STACK_PUSHX(stack, int, EXPAND_RECURSE); |
1854 » case UNION: | 1746 STACK_PUSHX(stack, voidptr, uni->left); |
1855 » { | 1747 STACK_PUSHX(stack, int, EXPAND_RECURSE); |
1856 » » tre_union_t *uni = node->obj; | 1748 break; |
1857 » » STACK_PUSHX(stack, voidptr, uni->right); | 1749 } |
1858 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); | 1750 case CATENATION: { |
1859 » » STACK_PUSHX(stack, voidptr, uni->left); | 1751 tre_catenation_t* cat = node->obj; |
1860 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); | 1752 STACK_PUSHX(stack, voidptr, cat->right); |
1861 » » break; | 1753 STACK_PUSHX(stack, int, EXPAND_RECURSE); |
1862 » } | 1754 STACK_PUSHX(stack, voidptr, cat->left); |
1863 » case CATENATION: | 1755 STACK_PUSHX(stack, int, EXPAND_RECURSE); |
1864 » { | 1756 break; |
1865 » » tre_catenation_t *cat = node->obj; | 1757 } |
1866 » » STACK_PUSHX(stack, voidptr, cat->right); | 1758 case ITERATION: { |
1867 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); | 1759 tre_iteration_t* iter = node->obj; |
1868 » » STACK_PUSHX(stack, voidptr, cat->left); | 1760 STACK_PUSHX(stack, int, pos_add); |
1869 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); | 1761 STACK_PUSHX(stack, voidptr, node); |
1870 » » break; | 1762 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER); |
1871 » } | 1763 STACK_PUSHX(stack, voidptr, iter->arg); |
1872 » case ITERATION: | 1764 STACK_PUSHX(stack, int, EXPAND_RECURSE); |
1873 » { | 1765 /* If we are going to expand this node at EXPAND_AFTER_ITER |
1874 » » tre_iteration_t *iter = node->obj; | 1766 then don't increase the `pos' fields of the nodes now, it |
1875 » » STACK_PUSHX(stack, int, pos_add); | 1767 will get done when expanding. */ |
1876 » » STACK_PUSHX(stack, voidptr, node); | 1768 if (iter->min > 1 || iter->max > 1) |
1877 » » STACK_PUSHX(stack, int, EXPAND_AFTER_ITER); | 1769 pos_add = 0; |
1878 » » STACK_PUSHX(stack, voidptr, iter->arg); | 1770 iter_depth++; |
1879 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); | 1771 break; |
1880 » » /* If we are going to expand this node at EXPAND_AFTER_ITER | 1772 } |
1881 » » then don't increase the `pos' fields of the nodes now, it | 1773 default: |
1882 » » will get done when expanding. */ | 1774 assert(0); |
1883 » » if (iter->min > 1 || iter->max > 1) | 1775 break; |
1884 » » pos_add = 0; | 1776 } |
1885 » » iter_depth++; | 1777 break; |
1886 » » break; | 1778 case EXPAND_AFTER_ITER: { |
1887 » } | 1779 tre_iteration_t* iter = node->obj; |
1888 » default: | 1780 int pos_add_last; |
1889 » assert(0); | 1781 pos_add = tre_stack_pop_int(stack); |
1890 » break; | 1782 pos_add_last = pos_add; |
1891 » } | 1783 if (iter->min > 1 || iter->max > 1) { |
1892 » break; | 1784 tre_ast_node_t *seq1 = NULL, *seq2 = NULL; |
1893 » case EXPAND_AFTER_ITER: | 1785 int j; |
1894 » { | 1786 int pos_add_save = pos_add; |
1895 » tre_iteration_t *iter = node->obj; | |
1896 » int pos_add_last; | |
1897 » pos_add = tre_stack_pop_int(stack); | |
1898 » pos_add_last = pos_add; | |
1899 » if (iter->min > 1 || iter->max > 1) | |
1900 » { | |
1901 » » tre_ast_node_t *seq1 = NULL, *seq2 = NULL; | |
1902 » » int j; | |
1903 » » int pos_add_save = pos_add; | |
1904 | 1787 |
1905 » » /* Create a catenated sequence of copies of the node. */ | 1788 /* Create a catenated sequence of copies of the node. */ |
1906 » » for (j = 0; j < iter->min; j++) | 1789 for (j = 0; j < iter->min; j++) { |
1907 » » { | 1790 tre_ast_node_t* copy; |
1908 » » tre_ast_node_t *copy; | 1791 /* Remove tags from all but the last copy. */ |
1909 » » /* Remove tags from all but the last copy. */ | 1792 int flags = ((j + 1 < iter->min) ? COPY_REMOVE_TAGS |
1910 » » int flags = ((j + 1 < iter->min) | 1793 : COPY_MAXIMIZE_FIRST_TAG); |
1911 » » » » ? COPY_REMOVE_TAGS | 1794 pos_add_save = pos_add; |
1912 » » » » : COPY_MAXIMIZE_FIRST_TAG); | 1795 status = tre_copy_ast(mem, stack, iter->arg, flags, &pos_add, |
1913 » » pos_add_save = pos_add; | 1796 tag_directions, ©, &max_pos); |
1914 » » status = tre_copy_ast(mem, stack, iter->arg, flags, | 1797 if (status != REG_OK) |
1915 » » » » » &pos_add, tag_directions, ©, | 1798 return status; |
1916 » » » » » &max_pos); | 1799 if (seq1 != NULL) |
1917 » » if (status != REG_OK) | 1800 seq1 = tre_ast_new_catenation(mem, seq1, copy); |
1918 » » return status; | 1801 else |
1919 » » if (seq1 != NULL) | 1802 seq1 = copy; |
1920 » » seq1 = tre_ast_new_catenation(mem, seq1, copy); | 1803 if (seq1 == NULL) |
1921 » » else | 1804 return REG_ESPACE; |
1922 » » seq1 = copy; | 1805 } |
1923 » » if (seq1 == NULL) | |
1924 » » return REG_ESPACE; | |
1925 » » } | |
1926 | 1806 |
1927 » » if (iter->max == -1) | 1807 if (iter->max == -1) { |
1928 » » { | 1808 /* No upper limit. */ |
1929 » » /* No upper limit. */ | 1809 pos_add_save = pos_add; |
1930 » » pos_add_save = pos_add; | 1810 status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL, |
1931 » » status = tre_copy_ast(mem, stack, iter->arg, 0, | 1811 &seq2, &max_pos); |
1932 » » » » » &pos_add, NULL, &seq2, &max_pos); | 1812 if (status != REG_OK) |
1933 » » if (status != REG_OK) | 1813 return status; |
1934 » » return status; | 1814 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); |
1935 » » seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); | 1815 if (seq2 == NULL) |
1936 » » if (seq2 == NULL) | 1816 return REG_ESPACE; |
1937 » » return REG_ESPACE; | 1817 } else { |
1938 » » } | 1818 for (j = iter->min; j < iter->max; j++) { |
1939 » » else | 1819 tre_ast_node_t *tmp, *copy; |
1940 » » { | 1820 pos_add_save = pos_add; |
1941 » » for (j = iter->min; j < iter->max; j++) | 1821 status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL, |
1942 » » { | 1822 ©, &max_pos); |
1943 » » » tre_ast_node_t *tmp, *copy; | 1823 if (status != REG_OK) |
1944 » » » pos_add_save = pos_add; | 1824 return status; |
1945 » » » status = tre_copy_ast(mem, stack, iter->arg, 0, | 1825 if (seq2 != NULL) |
1946 » » » » » &pos_add, NULL, ©, &max_pos); | 1826 seq2 = tre_ast_new_catenation(mem, copy, seq2); |
1947 » » » if (status != REG_OK) | 1827 else |
1948 » » » return status; | 1828 seq2 = copy; |
1949 » » » if (seq2 != NULL) | 1829 if (seq2 == NULL) |
1950 » » » seq2 = tre_ast_new_catenation(mem, copy, seq2); | 1830 return REG_ESPACE; |
1951 » » » else | 1831 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1); |
1952 » » » seq2 = copy; | 1832 if (tmp == NULL) |
1953 » » » if (seq2 == NULL) | 1833 return REG_ESPACE; |
1954 » » » return REG_ESPACE; | 1834 seq2 = tre_ast_new_union(mem, tmp, seq2); |
1955 » » » tmp = tre_ast_new_literal(mem, EMPTY, -1, -1); | 1835 if (seq2 == NULL) |
1956 » » » if (tmp == NULL) | 1836 return REG_ESPACE; |
1957 » » » return REG_ESPACE; | 1837 } |
1958 » » » seq2 = tre_ast_new_union(mem, tmp, seq2); | 1838 } |
1959 » » » if (seq2 == NULL) | |
1960 » » » return REG_ESPACE; | |
1961 » » } | |
1962 » » } | |
1963 | 1839 |
1964 » » pos_add = pos_add_save; | 1840 pos_add = pos_add_save; |
1965 » » if (seq1 == NULL) | 1841 if (seq1 == NULL) |
1966 » » seq1 = seq2; | 1842 seq1 = seq2; |
1967 » » else if (seq2 != NULL) | 1843 else if (seq2 != NULL) |
1968 » » seq1 = tre_ast_new_catenation(mem, seq1, seq2); | 1844 seq1 = tre_ast_new_catenation(mem, seq1, seq2); |
1969 » » if (seq1 == NULL) | 1845 if (seq1 == NULL) |
1970 » » return REG_ESPACE; | 1846 return REG_ESPACE; |
1971 » » node->obj = seq1->obj; | 1847 node->obj = seq1->obj; |
1972 » » node->type = seq1->type; | 1848 node->type = seq1->type; |
1973 » } | 1849 } |
1974 | 1850 |
1975 » iter_depth--; | 1851 iter_depth--; |
1976 » pos_add_total += pos_add - pos_add_last; | 1852 pos_add_total += pos_add - pos_add_last; |
1977 » if (iter_depth == 0) | 1853 if (iter_depth == 0) |
1978 » pos_add = pos_add_total; | 1854 pos_add = pos_add_total; |
1979 | 1855 |
1980 » break; | 1856 break; |
1981 » } | 1857 } |
1982 » default: | 1858 default: |
1983 » assert(0); | 1859 assert(0); |
1984 » break; | 1860 break; |
1985 » } | |
1986 } | 1861 } |
| 1862 } |
1987 | 1863 |
1988 *position += pos_add_total; | 1864 *position += pos_add_total; |
1989 | 1865 |
1990 /* `max_pos' should never be larger than `*position' if the above | 1866 /* `max_pos' should never be larger than `*position' if the above |
1991 code works, but just an extra safeguard let's make sure | 1867 code works, but just an extra safeguard let's make sure |
1992 `*position' is set large enough so enough memory will be | 1868 `*position' is set large enough so enough memory will be |
1993 allocated for the transition table. */ | 1869 allocated for the transition table. */ |
1994 if (max_pos > *position) | 1870 if (max_pos > *position) |
1995 *position = max_pos; | 1871 *position = max_pos; |
1996 | 1872 |
1997 return status; | 1873 return status; |
1998 } | 1874 } |
1999 | 1875 |
2000 static tre_pos_and_tags_t * | 1876 static tre_pos_and_tags_t* tre_set_empty(tre_mem_t mem) { |
2001 tre_set_empty(tre_mem_t mem) | 1877 tre_pos_and_tags_t* new_set; |
2002 { | |
2003 tre_pos_and_tags_t *new_set; | |
2004 | 1878 |
2005 new_set = tre_mem_calloc(mem, sizeof(*new_set)); | 1879 new_set = tre_mem_calloc(mem, sizeof(*new_set)); |
2006 if (new_set == NULL) | 1880 if (new_set == NULL) |
2007 return NULL; | 1881 return NULL; |
2008 | 1882 |
2009 new_set[0].position = -1; | 1883 new_set[0].position = -1; |
2010 new_set[0].code_min = -1; | 1884 new_set[0].code_min = -1; |
2011 new_set[0].code_max = -1; | 1885 new_set[0].code_max = -1; |
2012 | 1886 |
2013 return new_set; | 1887 return new_set; |
2014 } | 1888 } |
2015 | 1889 |
2016 static tre_pos_and_tags_t * | 1890 static tre_pos_and_tags_t* tre_set_one(tre_mem_t mem, |
2017 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max, | 1891 int position, |
2018 » tre_ctype_t class, tre_ctype_t *neg_classes, int backref) | 1892 int code_min, |
2019 { | 1893 int code_max, |
2020 tre_pos_and_tags_t *new_set; | 1894 tre_ctype_t class, |
| 1895 tre_ctype_t* neg_classes, |
| 1896 int backref) { |
| 1897 tre_pos_and_tags_t* new_set; |
2021 | 1898 |
2022 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); | 1899 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); |
2023 if (new_set == NULL) | 1900 if (new_set == NULL) |
2024 return NULL; | 1901 return NULL; |
2025 | 1902 |
2026 new_set[0].position = position; | 1903 new_set[0].position = position; |
2027 new_set[0].code_min = code_min; | 1904 new_set[0].code_min = code_min; |
2028 new_set[0].code_max = code_max; | 1905 new_set[0].code_max = code_max; |
2029 new_set[0].class = class; | 1906 new_set[0].class = class; |
2030 new_set[0].neg_classes = neg_classes; | 1907 new_set[0].neg_classes = neg_classes; |
2031 new_set[0].backref = backref; | 1908 new_set[0].backref = backref; |
2032 new_set[1].position = -1; | 1909 new_set[1].position = -1; |
2033 new_set[1].code_min = -1; | 1910 new_set[1].code_min = -1; |
2034 new_set[1].code_max = -1; | 1911 new_set[1].code_max = -1; |
2035 | 1912 |
2036 return new_set; | 1913 return new_set; |
2037 } | 1914 } |
2038 | 1915 |
2039 static tre_pos_and_tags_t * | 1916 static tre_pos_and_tags_t* tre_set_union(tre_mem_t mem, |
2040 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, | 1917 tre_pos_and_tags_t* set1, |
2041 » int *tags, int assertions) | 1918 tre_pos_and_tags_t* set2, |
2042 { | 1919 int* tags, |
| 1920 int assertions) { |
2043 int s1, s2, i, j; | 1921 int s1, s2, i, j; |
2044 tre_pos_and_tags_t *new_set; | 1922 tre_pos_and_tags_t* new_set; |
2045 int *new_tags; | 1923 int* new_tags; |
2046 int num_tags; | 1924 int num_tags; |
2047 | 1925 |
2048 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++); | 1926 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++) |
2049 for (s1 = 0; set1[s1].position >= 0; s1++); | 1927 ; |
2050 for (s2 = 0; set2[s2].position >= 0; s2++); | 1928 for (s1 = 0; set1[s1].position >= 0; s1++) |
| 1929 ; |
| 1930 for (s2 = 0; set2[s2].position >= 0; s2++) |
| 1931 ; |
2051 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); | 1932 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); |
2052 if (!new_set ) | 1933 if (!new_set) |
2053 return NULL; | 1934 return NULL; |
2054 | 1935 |
2055 for (s1 = 0; set1[s1].position >= 0; s1++) | 1936 for (s1 = 0; set1[s1].position >= 0; s1++) { |
2056 { | 1937 new_set[s1].position = set1[s1].position; |
2057 new_set[s1].position = set1[s1].position; | 1938 new_set[s1].code_min = set1[s1].code_min; |
2058 new_set[s1].code_min = set1[s1].code_min; | 1939 new_set[s1].code_max = set1[s1].code_max; |
2059 new_set[s1].code_max = set1[s1].code_max; | 1940 new_set[s1].assertions = set1[s1].assertions | assertions; |
2060 new_set[s1].assertions = set1[s1].assertions | assertions; | 1941 new_set[s1].class = set1[s1].class; |
2061 new_set[s1].class = set1[s1].class; | 1942 new_set[s1].neg_classes = set1[s1].neg_classes; |
2062 new_set[s1].neg_classes = set1[s1].neg_classes; | 1943 new_set[s1].backref = set1[s1].backref; |
2063 new_set[s1].backref = set1[s1].backref; | 1944 if (set1[s1].tags == NULL && tags == NULL) |
2064 if (set1[s1].tags == NULL && tags == NULL) | 1945 new_set[s1].tags = NULL; |
2065 » new_set[s1].tags = NULL; | 1946 else { |
2066 else | 1947 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++) |
2067 » { | 1948 ; |
2068 » for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++); | 1949 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) * (i + num_tags + 1))); |
2069 » new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) | 1950 if (new_tags == NULL) |
2070 » » » » » * (i + num_tags + 1))); | 1951 return NULL; |
2071 » if (new_tags == NULL) | 1952 for (j = 0; j < i; j++) |
2072 » return NULL; | 1953 new_tags[j] = set1[s1].tags[j]; |
2073 » for (j = 0; j < i; j++) | 1954 for (i = 0; i < num_tags; i++) |
2074 » new_tags[j] = set1[s1].tags[j]; | 1955 new_tags[j + i] = tags[i]; |
2075 » for (i = 0; i < num_tags; i++) | 1956 new_tags[j + i] = -1; |
2076 » new_tags[j + i] = tags[i]; | 1957 new_set[s1].tags = new_tags; |
2077 » new_tags[j + i] = -1; | |
2078 » new_set[s1].tags = new_tags; | |
2079 » } | |
2080 } | 1958 } |
| 1959 } |
2081 | 1960 |
2082 for (s2 = 0; set2[s2].position >= 0; s2++) | 1961 for (s2 = 0; set2[s2].position >= 0; s2++) { |
2083 { | 1962 new_set[s1 + s2].position = set2[s2].position; |
2084 new_set[s1 + s2].position = set2[s2].position; | 1963 new_set[s1 + s2].code_min = set2[s2].code_min; |
2085 new_set[s1 + s2].code_min = set2[s2].code_min; | 1964 new_set[s1 + s2].code_max = set2[s2].code_max; |
2086 new_set[s1 + s2].code_max = set2[s2].code_max; | 1965 /* XXX - why not | assertions here as well? */ |
2087 /* XXX - why not | assertions here as well? */ | 1966 new_set[s1 + s2].assertions = set2[s2].assertions; |
2088 new_set[s1 + s2].assertions = set2[s2].assertions; | 1967 new_set[s1 + s2].class = set2[s2].class; |
2089 new_set[s1 + s2].class = set2[s2].class; | 1968 new_set[s1 + s2].neg_classes = set2[s2].neg_classes; |
2090 new_set[s1 + s2].neg_classes = set2[s2].neg_classes; | 1969 new_set[s1 + s2].backref = set2[s2].backref; |
2091 new_set[s1 + s2].backref = set2[s2].backref; | 1970 if (set2[s2].tags == NULL) |
2092 if (set2[s2].tags == NULL) | 1971 new_set[s1 + s2].tags = NULL; |
2093 » new_set[s1 + s2].tags = NULL; | 1972 else { |
2094 else | 1973 for (i = 0; set2[s2].tags[i] >= 0; i++) |
2095 » { | 1974 ; |
2096 » for (i = 0; set2[s2].tags[i] >= 0; i++); | 1975 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1)); |
2097 » new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1)); | 1976 if (new_tags == NULL) |
2098 » if (new_tags == NULL) | 1977 return NULL; |
2099 » return NULL; | 1978 for (j = 0; j < i; j++) |
2100 » for (j = 0; j < i; j++) | 1979 new_tags[j] = set2[s2].tags[j]; |
2101 » new_tags[j] = set2[s2].tags[j]; | 1980 new_tags[j] = -1; |
2102 » new_tags[j] = -1; | 1981 new_set[s1 + s2].tags = new_tags; |
2103 » new_set[s1 + s2].tags = new_tags; | |
2104 » } | |
2105 } | 1982 } |
| 1983 } |
2106 new_set[s1 + s2].position = -1; | 1984 new_set[s1 + s2].position = -1; |
2107 return new_set; | 1985 return new_set; |
2108 } | 1986 } |
2109 | 1987 |
2110 /* Finds the empty path through `node' which is the one that should be | 1988 /* Finds the empty path through `node' which is the one that should be |
2111 taken according to POSIX.2 rules, and adds the tags on that path to | 1989 taken according to POSIX.2 rules, and adds the tags on that path to |
2112 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is | 1990 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is |
2113 set to the number of tags seen on the path. */ | 1991 set to the number of tags seen on the path. */ |
2114 static reg_errcode_t | 1992 static reg_errcode_t tre_match_empty(tre_stack_t* stack, |
2115 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags, | 1993 tre_ast_node_t* node, |
2116 » » int *assertions, int *num_tags_seen) | 1994 int* tags, |
2117 { | 1995 int* assertions, |
2118 tre_literal_t *lit; | 1996 int* num_tags_seen) { |
2119 tre_union_t *uni; | 1997 tre_literal_t* lit; |
2120 tre_catenation_t *cat; | 1998 tre_union_t* uni; |
2121 tre_iteration_t *iter; | 1999 tre_catenation_t* cat; |
| 2000 tre_iteration_t* iter; |
2122 int i; | 2001 int i; |
2123 int bottom = tre_stack_num_objects(stack); | 2002 int bottom = tre_stack_num_objects(stack); |
2124 reg_errcode_t status = REG_OK; | 2003 reg_errcode_t status = REG_OK; |
2125 if (num_tags_seen) | 2004 if (num_tags_seen) |
2126 *num_tags_seen = 0; | 2005 *num_tags_seen = 0; |
2127 | 2006 |
2128 status = tre_stack_push_voidptr(stack, node); | 2007 status = tre_stack_push_voidptr(stack, node); |
2129 | 2008 |
2130 /* Walk through the tree recursively. */ | 2009 /* Walk through the tree recursively. */ |
2131 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) | 2010 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) { |
2132 { | 2011 node = tre_stack_pop_voidptr(stack); |
2133 node = tre_stack_pop_voidptr(stack); | |
2134 | 2012 |
2135 switch (node->type) | 2013 switch (node->type) { |
2136 » { | 2014 case LITERAL: |
2137 » case LITERAL: | 2015 lit = (tre_literal_t*)node->obj; |
2138 » lit = (tre_literal_t *)node->obj; | 2016 switch (lit->code_min) { |
2139 » switch (lit->code_min) | 2017 case TAG: |
2140 » { | 2018 if (lit->code_max >= 0) { |
2141 » case TAG: | 2019 if (tags != NULL) { |
2142 » if (lit->code_max >= 0) | 2020 /* Add the tag to `tags'. */ |
2143 » » { | 2021 for (i = 0; tags[i] >= 0; i++) |
2144 » » if (tags != NULL) | 2022 if (tags[i] == lit->code_max) |
2145 » » { | 2023 break; |
2146 » » /* Add the tag to `tags'. */ | 2024 if (tags[i] < 0) { |
2147 » » for (i = 0; tags[i] >= 0; i++) | 2025 tags[i] = lit->code_max; |
2148 » » » if (tags[i] == lit->code_max) | 2026 tags[i + 1] = -1; |
2149 » » » break; | 2027 } |
2150 » » if (tags[i] < 0) | 2028 } |
2151 » » » { | 2029 if (num_tags_seen) |
2152 » » » tags[i] = lit->code_max; | 2030 (*num_tags_seen)++; |
2153 » » » tags[i + 1] = -1; | 2031 } |
2154 » » » } | 2032 break; |
2155 » » } | 2033 case ASSERTION: |
2156 » » if (num_tags_seen) | 2034 assert(lit->code_max >= 1 || lit->code_max <= ASSERT_LAST); |
2157 » » (*num_tags_seen)++; | 2035 if (assertions != NULL) |
2158 » » } | 2036 *assertions |= lit->code_max; |
2159 » break; | 2037 break; |
2160 » case ASSERTION: | 2038 case EMPTY: |
2161 » assert(lit->code_max >= 1 | 2039 break; |
2162 » » || lit->code_max <= ASSERT_LAST); | 2040 default: |
2163 » if (assertions != NULL) | 2041 assert(0); |
2164 » » *assertions |= lit->code_max; | 2042 break; |
2165 » break; | 2043 } |
2166 » case EMPTY: | 2044 break; |
2167 » break; | |
2168 » default: | |
2169 » assert(0); | |
2170 » break; | |
2171 » } | |
2172 » break; | |
2173 | 2045 |
2174 » case UNION: | 2046 case UNION: |
2175 » /* Subexpressions starting earlier take priority over ones | 2047 /* Subexpressions starting earlier take priority over ones |
2176 » starting later, so we prefer the left subexpression over the | 2048 starting later, so we prefer the left subexpression over the |
2177 » right subexpression. */ | 2049 right subexpression. */ |
2178 » uni = (tre_union_t *)node->obj; | 2050 uni = (tre_union_t*)node->obj; |
2179 » if (uni->left->nullable) | 2051 if (uni->left->nullable) |
2180 » STACK_PUSHX(stack, voidptr, uni->left) | 2052 STACK_PUSHX(stack, voidptr, uni->left) |
2181 » else if (uni->right->nullable) | 2053 else if (uni->right->nullable) |
2182 » STACK_PUSHX(stack, voidptr, uni->right) | 2054 STACK_PUSHX(stack, voidptr, uni->right) |
2183 » else | 2055 else |
2184 » assert(0); | 2056 assert(0); |
2185 » break; | 2057 break; |
2186 | 2058 |
2187 » case CATENATION: | 2059 case CATENATION: |
2188 » /* The path must go through both children. */ | 2060 /* The path must go through both children. */ |
2189 » cat = (tre_catenation_t *)node->obj; | 2061 cat = (tre_catenation_t*)node->obj; |
2190 » assert(cat->left->nullable); | 2062 assert(cat->left->nullable); |
2191 » assert(cat->right->nullable); | 2063 assert(cat->right->nullable); |
2192 » STACK_PUSHX(stack, voidptr, cat->left); | 2064 STACK_PUSHX(stack, voidptr, cat->left); |
2193 » STACK_PUSHX(stack, voidptr, cat->right); | 2065 STACK_PUSHX(stack, voidptr, cat->right); |
2194 » break; | 2066 break; |
2195 | 2067 |
2196 » case ITERATION: | 2068 case ITERATION: |
2197 » /* A match with an empty string is preferred over no match at | 2069 /* A match with an empty string is preferred over no match at |
2198 » all, so we go through the argument if possible. */ | 2070 all, so we go through the argument if possible. */ |
2199 » iter = (tre_iteration_t *)node->obj; | 2071 iter = (tre_iteration_t*)node->obj; |
2200 » if (iter->arg->nullable) | 2072 if (iter->arg->nullable) |
2201 » STACK_PUSHX(stack, voidptr, iter->arg); | 2073 STACK_PUSHX(stack, voidptr, iter->arg); |
2202 » break; | 2074 break; |
2203 | 2075 |
2204 » default: | 2076 default: |
2205 » assert(0); | 2077 assert(0); |
2206 » break; | 2078 break; |
2207 » } | |
2208 } | 2079 } |
| 2080 } |
2209 | 2081 |
2210 return status; | 2082 return status; |
2211 } | 2083 } |
2212 | 2084 |
2213 | |
2214 typedef enum { | 2085 typedef enum { |
2215 NFL_RECURSE, | 2086 NFL_RECURSE, |
2216 NFL_POST_UNION, | 2087 NFL_POST_UNION, |
2217 NFL_POST_CATENATION, | 2088 NFL_POST_CATENATION, |
2218 NFL_POST_ITERATION | 2089 NFL_POST_ITERATION |
2219 } tre_nfl_stack_symbol_t; | 2090 } tre_nfl_stack_symbol_t; |
2220 | 2091 |
2221 | |
2222 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for | 2092 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for |
2223 the nodes of the AST `tree'. */ | 2093 the nodes of the AST `tree'. */ |
2224 static reg_errcode_t | 2094 static reg_errcode_t tre_compute_nfl(tre_mem_t mem, |
2225 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) | 2095 tre_stack_t* stack, |
2226 { | 2096 tre_ast_node_t* tree) { |
2227 int bottom = tre_stack_num_objects(stack); | 2097 int bottom = tre_stack_num_objects(stack); |
2228 | 2098 |
2229 STACK_PUSHR(stack, voidptr, tree); | 2099 STACK_PUSHR(stack, voidptr, tree); |
2230 STACK_PUSHR(stack, int, NFL_RECURSE); | 2100 STACK_PUSHR(stack, int, NFL_RECURSE); |
2231 | 2101 |
2232 while (tre_stack_num_objects(stack) > bottom) | 2102 while (tre_stack_num_objects(stack) > bottom) { |
2233 { | 2103 tre_nfl_stack_symbol_t symbol; |
2234 tre_nfl_stack_symbol_t symbol; | 2104 tre_ast_node_t* node; |
2235 tre_ast_node_t *node; | 2105 |
2236 | 2106 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack); |
2237 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack); | 2107 node = tre_stack_pop_voidptr(stack); |
2238 node = tre_stack_pop_voidptr(stack); | 2108 switch (symbol) { |
2239 switch (symbol) | 2109 case NFL_RECURSE: |
2240 { | 2110 switch (node->type) { |
2241 case NFL_RECURSE: | 2111 case LITERAL: { |
2242 switch (node->type) | 2112 tre_literal_t* lit = (tre_literal_t*)node->obj; |
2243 { | 2113 if (IS_BACKREF(lit)) { |
2244 case LITERAL: | 2114 /* Back references: nullable = false, firstpos = {i}, |
2245 { | 2115 lastpos = {i}. */ |
2246 tre_literal_t *lit = (tre_literal_t *)node->obj; | 2116 node->nullable = 0; |
2247 if (IS_BACKREF(lit)) | 2117 node->firstpos = |
2248 { | 2118 tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1); |
2249 /* Back references: nullable = false, firstpos = {i}, | 2119 if (!node->firstpos) |
2250 lastpos = {i}. */ | 2120 return REG_ESPACE; |
2251 node->nullable = 0; | 2121 node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, |
2252 node->firstpos = tre_set_one(mem, lit->position, 0, | 2122 0, NULL, (int)lit->code_max); |
2253 TRE_CHAR_MAX, 0, NULL, -1); | 2123 if (!node->lastpos) |
2254 if (!node->firstpos) | 2124 return REG_ESPACE; |
2255 return REG_ESPACE; | 2125 } else if (lit->code_min < 0) { |
2256 node->lastpos = tre_set_one(mem, lit->position, 0, | 2126 /* Tags, empty strings, params, and zero width assertions: |
2257 TRE_CHAR_MAX, 0, NULL, | 2127 nullable = true, firstpos = {}, and lastpos = {}. */ |
2258 (int)lit->code_max); | 2128 node->nullable = 1; |
2259 if (!node->lastpos) | 2129 node->firstpos = tre_set_empty(mem); |
2260 return REG_ESPACE; | 2130 if (!node->firstpos) |
2261 } | 2131 return REG_ESPACE; |
2262 else if (lit->code_min < 0) | 2132 node->lastpos = tre_set_empty(mem); |
2263 { | 2133 if (!node->lastpos) |
2264 /* Tags, empty strings, params, and zero width assertions: | 2134 return REG_ESPACE; |
2265 nullable = true, firstpos = {}, and lastpos = {}. */ | 2135 } else { |
2266 node->nullable = 1; | 2136 /* Literal at position i: nullable = false, firstpos = {i}, |
2267 node->firstpos = tre_set_empty(mem); | 2137 lastpos = {i}. */ |
2268 if (!node->firstpos) | 2138 node->nullable = 0; |
2269 return REG_ESPACE; | 2139 node->firstpos = |
2270 node->lastpos = tre_set_empty(mem); | 2140 tre_set_one(mem, lit->position, (int)lit->code_min, |
2271 if (!node->lastpos) | 2141 (int)lit->code_max, 0, NULL, -1); |
2272 return REG_ESPACE; | 2142 if (!node->firstpos) |
2273 } | 2143 return REG_ESPACE; |
2274 else | 2144 node->lastpos = tre_set_one( |
2275 { | 2145 mem, lit->position, (int)lit->code_min, (int)lit->code_max, |
2276 /* Literal at position i: nullable = false, firstpos = {i}, | 2146 lit->class, lit->neg_classes, -1); |
2277 lastpos = {i}. */ | 2147 if (!node->lastpos) |
2278 node->nullable = 0; | 2148 return REG_ESPACE; |
2279 node->firstpos = | 2149 } |
2280 tre_set_one(mem, lit->position, (int)lit->code_min, | 2150 break; |
2281 (int)lit->code_max, 0, NULL, -1); | 2151 } |
2282 if (!node->firstpos) | 2152 |
2283 return REG_ESPACE; | 2153 case UNION: |
2284 node->lastpos = tre_set_one(mem, lit->position, | 2154 /* Compute the attributes for the two subtrees, and after that |
2285 (int)lit->code_min, | 2155 for this node. */ |
2286 (int)lit->code_max, | 2156 STACK_PUSHR(stack, voidptr, node); |
2287 lit->class, lit->neg_classes, | 2157 STACK_PUSHR(stack, int, NFL_POST_UNION); |
2288 -1); | 2158 STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->right); |
2289 if (!node->lastpos) | 2159 STACK_PUSHR(stack, int, NFL_RECURSE); |
2290 return REG_ESPACE; | 2160 STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->left); |
2291 } | 2161 STACK_PUSHR(stack, int, NFL_RECURSE); |
2292 break; | 2162 break; |
2293 } | 2163 |
2294 | 2164 case CATENATION: |
2295 case UNION: | 2165 /* Compute the attributes for the two subtrees, and after that |
2296 /* Compute the attributes for the two subtrees, and after that | 2166 for this node. */ |
2297 for this node. */ | 2167 STACK_PUSHR(stack, voidptr, node); |
2298 STACK_PUSHR(stack, voidptr, node); | 2168 STACK_PUSHR(stack, int, NFL_POST_CATENATION); |
2299 STACK_PUSHR(stack, int, NFL_POST_UNION); | 2169 STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->right); |
2300 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right); | 2170 STACK_PUSHR(stack, int, NFL_RECURSE); |
2301 STACK_PUSHR(stack, int, NFL_RECURSE); | 2171 STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->left); |
2302 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left); | 2172 STACK_PUSHR(stack, int, NFL_RECURSE); |
2303 STACK_PUSHR(stack, int, NFL_RECURSE); | 2173 break; |
2304 break; | 2174 |
2305 | 2175 case ITERATION: |
2306 case CATENATION: | 2176 /* Compute the attributes for the subtree, and after that for |
2307 /* Compute the attributes for the two subtrees, and after that | 2177 this node. */ |
2308 for this node. */ | 2178 STACK_PUSHR(stack, voidptr, node); |
2309 STACK_PUSHR(stack, voidptr, node); | 2179 STACK_PUSHR(stack, int, NFL_POST_ITERATION); |
2310 STACK_PUSHR(stack, int, NFL_POST_CATENATION); | 2180 STACK_PUSHR(stack, voidptr, ((tre_iteration_t*)node->obj)->arg); |
2311 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right
); | 2181 STACK_PUSHR(stack, int, NFL_RECURSE); |
2312 STACK_PUSHR(stack, int, NFL_RECURSE); | 2182 break; |
2313 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left)
; | 2183 } |
2314 STACK_PUSHR(stack, int, NFL_RECURSE); | 2184 break; /* end case: NFL_RECURSE */ |
2315 break; | 2185 |
2316 | 2186 case NFL_POST_UNION: { |
2317 case ITERATION: | 2187 tre_union_t* uni = (tre_union_t*)node->obj; |
2318 /* Compute the attributes for the subtree, and after that for | 2188 node->nullable = uni->left->nullable || uni->right->nullable; |
2319 this node. */ | 2189 node->firstpos = tre_set_union(mem, uni->left->firstpos, |
2320 STACK_PUSHR(stack, voidptr, node); | 2190 uni->right->firstpos, NULL, 0); |
2321 STACK_PUSHR(stack, int, NFL_POST_ITERATION); | 2191 if (!node->firstpos) |
2322 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg); | 2192 return REG_ESPACE; |
2323 STACK_PUSHR(stack, int, NFL_RECURSE); | 2193 node->lastpos = tre_set_union(mem, uni->left->lastpos, |
2324 break; | 2194 uni->right->lastpos, NULL, 0); |
2325 } | 2195 if (!node->lastpos) |
2326 break; /* end case: NFL_RECURSE */ | 2196 return REG_ESPACE; |
2327 | 2197 break; |
2328 case NFL_POST_UNION: | 2198 } |
2329 { | 2199 |
2330 tre_union_t *uni = (tre_union_t *)node->obj; | 2200 case NFL_POST_ITERATION: { |
2331 node->nullable = uni->left->nullable || uni->right->nullable; | 2201 tre_iteration_t* iter = (tre_iteration_t*)node->obj; |
2332 node->firstpos = tre_set_union(mem, uni->left->firstpos, | 2202 |
2333 uni->right->firstpos, NULL, 0); | 2203 if (iter->min == 0 || iter->arg->nullable) |
2334 if (!node->firstpos) | 2204 node->nullable = 1; |
2335 return REG_ESPACE; | 2205 else |
2336 node->lastpos = tre_set_union(mem, uni->left->lastpos, | 2206 node->nullable = 0; |
2337 uni->right->lastpos, NULL, 0); | 2207 node->firstpos = iter->arg->firstpos; |
2338 if (!node->lastpos) | 2208 node->lastpos = iter->arg->lastpos; |
2339 return REG_ESPACE; | 2209 break; |
2340 break; | 2210 } |
2341 } | 2211 |
2342 | 2212 case NFL_POST_CATENATION: { |
2343 case NFL_POST_ITERATION: | 2213 int num_tags, *tags, assertions; |
2344 { | 2214 reg_errcode_t status; |
2345 tre_iteration_t *iter = (tre_iteration_t *)node->obj; | 2215 tre_catenation_t* cat = node->obj; |
2346 | 2216 node->nullable = cat->left->nullable && cat->right->nullable; |
2347 if (iter->min == 0 || iter->arg->nullable) | 2217 |
2348 node->nullable = 1; | 2218 /* Compute firstpos. */ |
2349 else | 2219 if (cat->left->nullable) { |
2350 node->nullable = 0; | 2220 /* The left side matches the empty string. Make a first pass |
2351 node->firstpos = iter->arg->firstpos; | 2221 with tre_match_empty() to get the number of tags and |
2352 node->lastpos = iter->arg->lastpos; | 2222 parameters. */ |
2353 break; | 2223 status = tre_match_empty(stack, cat->left, NULL, NULL, &num_tags); |
2354 } | 2224 if (status != REG_OK) |
2355 | 2225 return status; |
2356 case NFL_POST_CATENATION: | 2226 /* Allocate arrays for the tags and parameters. */ |
2357 { | 2227 tags = xmalloc(sizeof(*tags) * (num_tags + 1)); |
2358 int num_tags, *tags, assertions; | 2228 if (!tags) |
2359 reg_errcode_t status; | 2229 return REG_ESPACE; |
2360 tre_catenation_t *cat = node->obj; | 2230 tags[0] = -1; |
2361 node->nullable = cat->left->nullable && cat->right->nullable; | 2231 assertions = 0; |
2362 | 2232 /* Second pass with tre_mach_empty() to get the list of |
2363 /* Compute firstpos. */ | 2233 tags and parameters. */ |
2364 if (cat->left->nullable) | 2234 status = tre_match_empty(stack, cat->left, tags, &assertions, NULL); |
2365 { | 2235 if (status != REG_OK) { |
2366 /* The left side matches the empty string. Make a first pass | 2236 xfree(tags); |
2367 with tre_match_empty() to get the number of tags and | 2237 return status; |
2368 parameters. */ | 2238 } |
2369 status = tre_match_empty(stack, cat->left, | 2239 node->firstpos = tre_set_union(mem, cat->right->firstpos, |
2370 NULL, NULL, &num_tags); | 2240 cat->left->firstpos, tags, assertions); |
2371 if (status != REG_OK) | 2241 xfree(tags); |
2372 return status; | 2242 if (!node->firstpos) |
2373 /* Allocate arrays for the tags and parameters. */ | 2243 return REG_ESPACE; |
2374 tags = xmalloc(sizeof(*tags) * (num_tags + 1)); | 2244 } else { |
2375 if (!tags) | 2245 node->firstpos = cat->left->firstpos; |
2376 return REG_ESPACE; | 2246 } |
2377 tags[0] = -1; | 2247 |
2378 assertions = 0; | 2248 /* Compute lastpos. */ |
2379 /* Second pass with tre_mach_empty() to get the list of | 2249 if (cat->right->nullable) { |
2380 tags and parameters. */ | 2250 /* The right side matches the empty string. Make a first pass |
2381 status = tre_match_empty(stack, cat->left, tags, | 2251 with tre_match_empty() to get the number of tags and |
2382 &assertions, NULL); | 2252 parameters. */ |
2383 if (status != REG_OK) | 2253 status = tre_match_empty(stack, cat->right, NULL, NULL, &num_tags); |
2384 { | 2254 if (status != REG_OK) |
2385 xfree(tags); | 2255 return status; |
2386 return status; | 2256 /* Allocate arrays for the tags and parameters. */ |
2387 } | 2257 tags = xmalloc(sizeof(int) * (num_tags + 1)); |
2388 node->firstpos = | 2258 if (!tags) |
2389 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos, | 2259 return REG_ESPACE; |
2390 tags, assertions); | 2260 tags[0] = -1; |
2391 xfree(tags); | 2261 assertions = 0; |
2392 if (!node->firstpos) | 2262 /* Second pass with tre_mach_empty() to get the list of |
2393 return REG_ESPACE; | 2263 tags and parameters. */ |
2394 } | 2264 status = tre_match_empty(stack, cat->right, tags, &assertions, NULL); |
2395 else | 2265 if (status != REG_OK) { |
2396 { | 2266 xfree(tags); |
2397 node->firstpos = cat->left->firstpos; | 2267 return status; |
2398 } | 2268 } |
2399 | 2269 node->lastpos = tre_set_union(mem, cat->left->lastpos, |
2400 /* Compute lastpos. */ | 2270 cat->right->lastpos, tags, assertions); |
2401 if (cat->right->nullable) | 2271 xfree(tags); |
2402 { | 2272 if (!node->lastpos) |
2403 /* The right side matches the empty string. Make a first pass | 2273 return REG_ESPACE; |
2404 with tre_match_empty() to get the number of tags and | 2274 } else { |
2405 parameters. */ | 2275 node->lastpos = cat->right->lastpos; |
2406 status = tre_match_empty(stack, cat->right, | 2276 } |
2407 NULL, NULL, &num_tags); | 2277 break; |
2408 if (status != REG_OK) | 2278 } |
2409 return status; | 2279 |
2410 /* Allocate arrays for the tags and parameters. */ | 2280 default: |
2411 tags = xmalloc(sizeof(int) * (num_tags + 1)); | 2281 assert(0); |
2412 if (!tags) | 2282 break; |
2413 return REG_ESPACE; | |
2414 tags[0] = -1; | |
2415 assertions = 0; | |
2416 /* Second pass with tre_mach_empty() to get the list of | |
2417 tags and parameters. */ | |
2418 status = tre_match_empty(stack, cat->right, tags, | |
2419 &assertions, NULL); | |
2420 if (status != REG_OK) | |
2421 { | |
2422 xfree(tags); | |
2423 return status; | |
2424 } | |
2425 node->lastpos = | |
2426 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos, | |
2427 tags, assertions); | |
2428 xfree(tags); | |
2429 if (!node->lastpos) | |
2430 return REG_ESPACE; | |
2431 } | |
2432 else | |
2433 { | |
2434 node->lastpos = cat->right->lastpos; | |
2435 } | |
2436 break; | |
2437 } | |
2438 | |
2439 default: | |
2440 assert(0); | |
2441 break; | |
2442 } | |
2443 } | 2283 } |
| 2284 } |
2444 | 2285 |
2445 return REG_OK; | 2286 return REG_OK; |
2446 } | 2287 } |
2447 | 2288 |
2448 | |
2449 /* Adds a transition from each position in `p1' to each position in `p2'. */ | 2289 /* Adds a transition from each position in `p1' to each position in `p2'. */ |
2450 static reg_errcode_t | 2290 static reg_errcode_t tre_make_trans(tre_pos_and_tags_t* p1, |
2451 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, | 2291 tre_pos_and_tags_t* p2, |
2452 » tre_tnfa_transition_t *transitions, | 2292 tre_tnfa_transition_t* transitions, |
2453 » int *counts, int *offs) | 2293 int* counts, |
2454 { | 2294 int* offs) { |
2455 tre_pos_and_tags_t *orig_p2 = p2; | 2295 tre_pos_and_tags_t* orig_p2 = p2; |
2456 tre_tnfa_transition_t *trans; | 2296 tre_tnfa_transition_t* trans; |
2457 int i, j, k, l, dup, prev_p2_pos; | 2297 int i, j, k, l, dup, prev_p2_pos; |
2458 | 2298 |
2459 if (transitions != NULL) | 2299 if (transitions != NULL) |
2460 while (p1->position >= 0) | 2300 while (p1->position >= 0) { |
2461 { | 2301 p2 = orig_p2; |
2462 » p2 = orig_p2; | 2302 prev_p2_pos = -1; |
2463 » prev_p2_pos = -1; | 2303 while (p2->position >= 0) { |
2464 » while (p2->position >= 0) | 2304 /* Optimization: if this position was already handled, skip it. */ |
2465 » { | 2305 if (p2->position == prev_p2_pos) { |
2466 » /* Optimization: if this position was already handled, skip it. */ | 2306 p2++; |
2467 » if (p2->position == prev_p2_pos) | 2307 continue; |
2468 » { | 2308 } |
2469 » » p2++; | 2309 prev_p2_pos = p2->position; |
2470 » » continue; | 2310 /* Set `trans' to point to the next unused transition from |
2471 » } | 2311 position `p1->position'. */ |
2472 » prev_p2_pos = p2->position; | 2312 trans = transitions + offs[p1->position]; |
2473 » /* Set `trans' to point to the next unused transition from | 2313 while (trans->state != NULL) { |
2474 » position `p1->position'. */ | |
2475 » trans = transitions + offs[p1->position]; | |
2476 » while (trans->state != NULL) | |
2477 » { | |
2478 #if 0 | 2314 #if 0 |
2479 /* If we find a previous transition from `p1->position' to | 2315 /* If we find a previous transition from `p1->position' to |
2480 `p2->position', it is overwritten. This can happen only | 2316 `p2->position', it is overwritten. This can happen only |
2481 if there are nested loops in the regexp, like in "((a)*)*". | 2317 if there are nested loops in the regexp, like in "((a)*)*". |
2482 In POSIX.2 repetition using the outer loop is always | 2318 In POSIX.2 repetition using the outer loop is always |
2483 preferred over using the inner loop. Therefore the | 2319 preferred over using the inner loop. Therefore the |
2484 transition for the inner loop is useless and can be thrown | 2320 transition for the inner loop is useless and can be thrown |
2485 away. */ | 2321 away. */ |
2486 /* XXX - The same position is used for all nodes in a bracket | 2322 /* XXX - The same position is used for all nodes in a bracket |
2487 expression, so this optimization cannot be used (it will | 2323 expression, so this optimization cannot be used (it will |
2488 break bracket expressions) unless I figure out a way to | 2324 break bracket expressions) unless I figure out a way to |
2489 detect it here. */ | 2325 detect it here. */ |
2490 if (trans->state_id == p2->position) | 2326 if (trans->state_id == p2->position) |
2491 { | 2327 { |
2492 break; | 2328 break; |
2493 } | 2329 } |
2494 #endif | 2330 #endif |
2495 » » trans++; | 2331 trans++; |
2496 » } | 2332 } |
2497 | 2333 |
2498 » if (trans->state == NULL) | 2334 if (trans->state == NULL) |
2499 » (trans + 1)->state = NULL; | 2335 (trans + 1)->state = NULL; |
2500 » /* Use the character ranges, assertions, etc. from `p1' for | 2336 /* Use the character ranges, assertions, etc. from `p1' for |
2501 » the transition from `p1' to `p2'. */ | 2337 the transition from `p1' to `p2'. */ |
2502 » trans->code_min = p1->code_min; | 2338 trans->code_min = p1->code_min; |
2503 » trans->code_max = p1->code_max; | 2339 trans->code_max = p1->code_max; |
2504 » trans->state = transitions + offs[p2->position]; | 2340 trans->state = transitions + offs[p2->position]; |
2505 » trans->state_id = p2->position; | 2341 trans->state_id = p2->position; |
2506 » trans->assertions = p1->assertions | p2->assertions | 2342 trans->assertions = |
2507 » | (p1->class ? ASSERT_CHAR_CLASS : 0) | 2343 p1->assertions | p2->assertions | |
2508 » | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); | 2344 (p1->class ? ASSERT_CHAR_CLASS : 0) | |
2509 » if (p1->backref >= 0) | 2345 (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); |
2510 » { | 2346 if (p1->backref >= 0) { |
2511 » » assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); | 2347 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); |
2512 » » assert(p2->backref < 0); | 2348 assert(p2->backref < 0); |
2513 » » trans->u.backref = p1->backref; | 2349 trans->u.backref = p1->backref; |
2514 » » trans->assertions |= ASSERT_BACKREF; | 2350 trans->assertions |= ASSERT_BACKREF; |
2515 » } | 2351 } else |
2516 » else | 2352 trans->u.class = p1->class; |
2517 » trans->u.class = p1->class; | 2353 if (p1->neg_classes != NULL) { |
2518 » if (p1->neg_classes != NULL) | 2354 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) |
2519 » { | 2355 ; |
2520 » » for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++); | 2356 trans->neg_classes = xmalloc(sizeof(*trans->neg_classes) * (i + 1)); |
2521 » » trans->neg_classes = | 2357 if (trans->neg_classes == NULL) |
2522 » » xmalloc(sizeof(*trans->neg_classes) * (i + 1)); | 2358 return REG_ESPACE; |
2523 » » if (trans->neg_classes == NULL) | 2359 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) |
2524 » » return REG_ESPACE; | 2360 trans->neg_classes[i] = p1->neg_classes[i]; |
2525 » » for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) | 2361 trans->neg_classes[i] = (tre_ctype_t)0; |
2526 » » trans->neg_classes[i] = p1->neg_classes[i]; | 2362 } else |
2527 » » trans->neg_classes[i] = (tre_ctype_t)0; | 2363 trans->neg_classes = NULL; |
2528 » } | |
2529 » else | |
2530 » trans->neg_classes = NULL; | |
2531 | 2364 |
2532 » /* Find out how many tags this transition has. */ | 2365 /* Find out how many tags this transition has. */ |
2533 » i = 0; | 2366 i = 0; |
2534 » if (p1->tags != NULL) | 2367 if (p1->tags != NULL) |
2535 » while(p1->tags[i] >= 0) | 2368 while (p1->tags[i] >= 0) |
2536 » » i++; | 2369 i++; |
2537 » j = 0; | 2370 j = 0; |
2538 » if (p2->tags != NULL) | 2371 if (p2->tags != NULL) |
2539 » while(p2->tags[j] >= 0) | 2372 while (p2->tags[j] >= 0) |
2540 » » j++; | 2373 j++; |
2541 | 2374 |
2542 » /* If we are overwriting a transition, free the old tag array. */ | 2375 /* If we are overwriting a transition, free the old tag array. */ |
2543 » if (trans->tags != NULL) | 2376 if (trans->tags != NULL) |
2544 » xfree(trans->tags); | 2377 xfree(trans->tags); |
2545 » trans->tags = NULL; | 2378 trans->tags = NULL; |
2546 | 2379 |
2547 » /* If there were any tags, allocate an array and fill it. */ | 2380 /* If there were any tags, allocate an array and fill it. */ |
2548 » if (i + j > 0) | 2381 if (i + j > 0) { |
2549 » { | 2382 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); |
2550 » » trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); | 2383 if (!trans->tags) |
2551 » » if (!trans->tags) | 2384 return REG_ESPACE; |
2552 » » return REG_ESPACE; | 2385 i = 0; |
2553 » » i = 0; | 2386 if (p1->tags != NULL) |
2554 » » if (p1->tags != NULL) | 2387 while (p1->tags[i] >= 0) { |
2555 » » while(p1->tags[i] >= 0) | 2388 trans->tags[i] = p1->tags[i]; |
2556 » » { | 2389 i++; |
2557 » » trans->tags[i] = p1->tags[i]; | 2390 } |
2558 » » i++; | 2391 l = i; |
2559 » » } | 2392 j = 0; |
2560 » » l = i; | 2393 if (p2->tags != NULL) |
2561 » » j = 0; | 2394 while (p2->tags[j] >= 0) { |
2562 » » if (p2->tags != NULL) | 2395 /* Don't add duplicates. */ |
2563 » » while (p2->tags[j] >= 0) | 2396 dup = 0; |
2564 » » { | 2397 for (k = 0; k < i; k++) |
2565 » » /* Don't add duplicates. */ | 2398 if (trans->tags[k] == p2->tags[j]) { |
2566 » » dup = 0; | 2399 dup = 1; |
2567 » » for (k = 0; k < i; k++) | 2400 break; |
2568 » » » if (trans->tags[k] == p2->tags[j]) | 2401 } |
2569 » » » { | 2402 if (!dup) |
2570 » » » dup = 1; | 2403 trans->tags[l++] = p2->tags[j]; |
2571 » » » break; | 2404 j++; |
2572 » » » } | 2405 } |
2573 » » if (!dup) | 2406 trans->tags[l] = -1; |
2574 » » » trans->tags[l++] = p2->tags[j]; | 2407 } |
2575 » » j++; | |
2576 » » } | |
2577 » » trans->tags[l] = -1; | |
2578 » } | |
2579 | 2408 |
2580 » p2++; | 2409 p2++; |
2581 » } | |
2582 » p1++; | |
2583 } | 2410 } |
| 2411 p1++; |
| 2412 } |
2584 else | 2413 else |
2585 /* Compute a maximum limit for the number of transitions leaving | 2414 /* Compute a maximum limit for the number of transitions leaving |
2586 from each state. */ | 2415 from each state. */ |
2587 while (p1->position >= 0) | 2416 while (p1->position >= 0) { |
2588 { | 2417 p2 = orig_p2; |
2589 » p2 = orig_p2; | 2418 while (p2->position >= 0) { |
2590 » while (p2->position >= 0) | 2419 counts[p1->position]++; |
2591 » { | 2420 p2++; |
2592 » counts[p1->position]++; | |
2593 » p2++; | |
2594 » } | |
2595 » p1++; | |
2596 } | 2421 } |
| 2422 p1++; |
| 2423 } |
2597 return REG_OK; | 2424 return REG_OK; |
2598 } | 2425 } |
2599 | 2426 |
2600 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are | 2427 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are |
2601 labelled with one character range (there are no transitions on empty | 2428 labelled with one character range (there are no transitions on empty |
2602 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of | 2429 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of |
2603 the regexp. */ | 2430 the regexp. */ |
2604 static reg_errcode_t | 2431 static reg_errcode_t tre_ast_to_tnfa(tre_ast_node_t* node, |
2605 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions, | 2432 tre_tnfa_transition_t* transitions, |
2606 » » int *counts, int *offs) | 2433 int* counts, |
2607 { | 2434 int* offs) { |
2608 tre_union_t *uni; | 2435 tre_union_t* uni; |
2609 tre_catenation_t *cat; | 2436 tre_catenation_t* cat; |
2610 tre_iteration_t *iter; | 2437 tre_iteration_t* iter; |
2611 reg_errcode_t errcode = REG_OK; | 2438 reg_errcode_t errcode = REG_OK; |
2612 | 2439 |
2613 /* XXX - recurse using a stack!. */ | 2440 /* XXX - recurse using a stack!. */ |
2614 switch (node->type) | 2441 switch (node->type) { |
2615 { | |
2616 case LITERAL: | 2442 case LITERAL: |
2617 break; | 2443 break; |
2618 case UNION: | 2444 case UNION: |
2619 uni = (tre_union_t *)node->obj; | 2445 uni = (tre_union_t*)node->obj; |
2620 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); | 2446 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); |
2621 if (errcode != REG_OK) | 2447 if (errcode != REG_OK) |
2622 » return errcode; | 2448 return errcode; |
2623 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); | 2449 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); |
2624 break; | 2450 break; |
2625 | 2451 |
2626 case CATENATION: | 2452 case CATENATION: |
2627 cat = (tre_catenation_t *)node->obj; | 2453 cat = (tre_catenation_t*)node->obj; |
2628 /* Add a transition from each position in cat->left->lastpos | 2454 /* Add a transition from each position in cat->left->lastpos |
2629 » to each position in cat->right->firstpos. */ | 2455 to each position in cat->right->firstpos. */ |
2630 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, | 2456 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, |
2631 » » » transitions, counts, offs); | 2457 transitions, counts, offs); |
2632 if (errcode != REG_OK) | 2458 if (errcode != REG_OK) |
2633 » return errcode; | 2459 return errcode; |
2634 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); | 2460 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); |
2635 if (errcode != REG_OK) | 2461 if (errcode != REG_OK) |
2636 » return errcode; | 2462 return errcode; |
2637 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); | 2463 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); |
2638 break; | 2464 break; |
2639 | 2465 |
2640 case ITERATION: | 2466 case ITERATION: |
2641 iter = (tre_iteration_t *)node->obj; | 2467 iter = (tre_iteration_t*)node->obj; |
2642 assert(iter->max == -1 || iter->max == 1); | 2468 assert(iter->max == -1 || iter->max == 1); |
2643 | 2469 |
2644 if (iter->max == -1) | 2470 if (iter->max == -1) { |
2645 » { | 2471 assert(iter->min == 0 || iter->min == 1); |
2646 » assert(iter->min == 0 || iter->min == 1); | 2472 /* Add a transition from each last position in the iterated |
2647 » /* Add a transition from each last position in the iterated | 2473 expression to each first position. */ |
2648 » expression to each first position. */ | 2474 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, |
2649 » errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, | 2475 transitions, counts, offs); |
2650 » » » » transitions, counts, offs); | 2476 if (errcode != REG_OK) |
2651 » if (errcode != REG_OK) | 2477 return errcode; |
2652 » return errcode; | 2478 } |
2653 » } | |
2654 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); | 2479 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); |
2655 break; | 2480 break; |
2656 } | 2481 } |
2657 return errcode; | 2482 return errcode; |
2658 } | 2483 } |
2659 | 2484 |
| 2485 #define ERROR_EXIT(err) \ |
| 2486 do { \ |
| 2487 errcode = err; \ |
| 2488 if (/*CONSTCOND*/ 1) \ |
| 2489 goto error_exit; \ |
| 2490 } while (/*CONSTCOND*/ 0) |
2660 | 2491 |
2661 #define ERROR_EXIT(err)»» \ | 2492 int regcomp(regex_t* restrict preg, const char* restrict regex, int cflags) { |
2662 do» » » » \ | 2493 tre_stack_t* stack; |
2663 {» » » » \ | |
2664 errcode = err;» » \ | |
2665 if (/*CONSTCOND*/1)» \ | |
2666 » goto error_exit;» \ | |
2667 }» » » » \ | |
2668 while (/*CONSTCOND*/0) | |
2669 | |
2670 | |
2671 int | |
2672 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags) | |
2673 { | |
2674 tre_stack_t *stack; | |
2675 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; | 2494 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; |
2676 tre_pos_and_tags_t *p; | 2495 tre_pos_and_tags_t* p; |
2677 int *counts = NULL, *offs = NULL; | 2496 int *counts = NULL, *offs = NULL; |
2678 int i, add = 0; | 2497 int i, add = 0; |
2679 tre_tnfa_transition_t *transitions, *initial; | 2498 tre_tnfa_transition_t *transitions, *initial; |
2680 tre_tnfa_t *tnfa = NULL; | 2499 tre_tnfa_t* tnfa = NULL; |
2681 tre_submatch_data_t *submatch_data; | 2500 tre_submatch_data_t* submatch_data; |
2682 tre_tag_direction_t *tag_directions = NULL; | 2501 tre_tag_direction_t* tag_directions = NULL; |
2683 reg_errcode_t errcode; | 2502 reg_errcode_t errcode; |
2684 tre_mem_t mem; | 2503 tre_mem_t mem; |
2685 | 2504 |
2686 /* Parse context. */ | 2505 /* Parse context. */ |
2687 tre_parse_ctx_t parse_ctx; | 2506 tre_parse_ctx_t parse_ctx; |
2688 | 2507 |
2689 /* Allocate a stack used throughout the compilation process for various | 2508 /* Allocate a stack used throughout the compilation process for various |
2690 purposes. */ | 2509 purposes. */ |
2691 stack = tre_stack_new(512, 1024000, 128); | 2510 stack = tre_stack_new(512, 1024000, 128); |
2692 if (!stack) | 2511 if (!stack) |
2693 return REG_ESPACE; | 2512 return REG_ESPACE; |
2694 /* Allocate a fast memory allocator. */ | 2513 /* Allocate a fast memory allocator. */ |
2695 mem = tre_mem_new(); | 2514 mem = tre_mem_new(); |
2696 if (!mem) | 2515 if (!mem) { |
2697 { | 2516 tre_stack_destroy(stack); |
2698 tre_stack_destroy(stack); | 2517 return REG_ESPACE; |
2699 return REG_ESPACE; | 2518 } |
2700 } | |
2701 | 2519 |
2702 /* Parse the regexp. */ | 2520 /* Parse the regexp. */ |
2703 memset(&parse_ctx, 0, sizeof(parse_ctx)); | 2521 memset(&parse_ctx, 0, sizeof(parse_ctx)); |
2704 parse_ctx.mem = mem; | 2522 parse_ctx.mem = mem; |
2705 parse_ctx.stack = stack; | 2523 parse_ctx.stack = stack; |
2706 parse_ctx.re = regex; | 2524 parse_ctx.re = regex; |
2707 parse_ctx.cflags = cflags; | 2525 parse_ctx.cflags = cflags; |
2708 parse_ctx.max_backref = -1; | 2526 parse_ctx.max_backref = -1; |
2709 errcode = tre_parse(&parse_ctx); | 2527 errcode = tre_parse(&parse_ctx); |
2710 if (errcode != REG_OK) | 2528 if (errcode != REG_OK) |
(...skipping 12 matching lines...) Expand all Loading... |
2723 /* Allocate the TNFA struct. */ | 2541 /* Allocate the TNFA struct. */ |
2724 tnfa = xcalloc(1, sizeof(tre_tnfa_t)); | 2542 tnfa = xcalloc(1, sizeof(tre_tnfa_t)); |
2725 if (tnfa == NULL) | 2543 if (tnfa == NULL) |
2726 ERROR_EXIT(REG_ESPACE); | 2544 ERROR_EXIT(REG_ESPACE); |
2727 tnfa->have_backrefs = parse_ctx.max_backref >= 0; | 2545 tnfa->have_backrefs = parse_ctx.max_backref >= 0; |
2728 tnfa->have_approx = 0; | 2546 tnfa->have_approx = 0; |
2729 tnfa->num_submatches = parse_ctx.submatch_id; | 2547 tnfa->num_submatches = parse_ctx.submatch_id; |
2730 | 2548 |
2731 /* Set up tags for submatch addressing. If REG_NOSUB is set and the | 2549 /* Set up tags for submatch addressing. If REG_NOSUB is set and the |
2732 regexp does not have back references, this can be skipped. */ | 2550 regexp does not have back references, this can be skipped. */ |
2733 if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) | 2551 if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) { |
2734 { | 2552 /* Figure out how many tags we will need. */ |
| 2553 errcode = tre_add_tags(NULL, stack, tree, tnfa); |
| 2554 if (errcode != REG_OK) |
| 2555 ERROR_EXIT(errcode); |
2735 | 2556 |
2736 /* Figure out how many tags we will need. */ | 2557 if (tnfa->num_tags > 0) { |
2737 errcode = tre_add_tags(NULL, stack, tree, tnfa); | 2558 tag_directions = xmalloc(sizeof(*tag_directions) * (tnfa->num_tags + 1)); |
2738 if (errcode != REG_OK) | 2559 if (tag_directions == NULL) |
2739 » ERROR_EXIT(errcode); | 2560 ERROR_EXIT(REG_ESPACE); |
| 2561 tnfa->tag_directions = tag_directions; |
| 2562 memset(tag_directions, -1, |
| 2563 sizeof(*tag_directions) * (tnfa->num_tags + 1)); |
| 2564 } |
| 2565 tnfa->minimal_tags = |
| 2566 xcalloc((unsigned)tnfa->num_tags * 2 + 1, sizeof(*tnfa->minimal_tags)); |
| 2567 if (tnfa->minimal_tags == NULL) |
| 2568 ERROR_EXIT(REG_ESPACE); |
2740 | 2569 |
2741 if (tnfa->num_tags > 0) | 2570 submatch_data = |
2742 » { | 2571 xcalloc((unsigned)parse_ctx.submatch_id, sizeof(*submatch_data)); |
2743 » tag_directions = xmalloc(sizeof(*tag_directions) | 2572 if (submatch_data == NULL) |
2744 » » » » * (tnfa->num_tags + 1)); | 2573 ERROR_EXIT(REG_ESPACE); |
2745 » if (tag_directions == NULL) | 2574 tnfa->submatch_data = submatch_data; |
2746 » ERROR_EXIT(REG_ESPACE); | |
2747 » tnfa->tag_directions = tag_directions; | |
2748 » memset(tag_directions, -1, | |
2749 » » sizeof(*tag_directions) * (tnfa->num_tags + 1)); | |
2750 » } | |
2751 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1, | |
2752 » » » » sizeof(*tnfa->minimal_tags)); | |
2753 if (tnfa->minimal_tags == NULL) | |
2754 » ERROR_EXIT(REG_ESPACE); | |
2755 | 2575 |
2756 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id, | 2576 errcode = tre_add_tags(mem, stack, tree, tnfa); |
2757 » » » sizeof(*submatch_data)); | 2577 if (errcode != REG_OK) |
2758 if (submatch_data == NULL) | 2578 ERROR_EXIT(errcode); |
2759 » ERROR_EXIT(REG_ESPACE); | 2579 } |
2760 tnfa->submatch_data = submatch_data; | |
2761 | |
2762 errcode = tre_add_tags(mem, stack, tree, tnfa); | |
2763 if (errcode != REG_OK) | |
2764 » ERROR_EXIT(errcode); | |
2765 | |
2766 } | |
2767 | 2580 |
2768 /* Expand iteration nodes. */ | 2581 /* Expand iteration nodes. */ |
2769 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position, | 2582 errcode = |
2770 » » » tag_directions); | 2583 tre_expand_ast(mem, stack, tree, &parse_ctx.position, tag_directions); |
2771 if (errcode != REG_OK) | 2584 if (errcode != REG_OK) |
2772 ERROR_EXIT(errcode); | 2585 ERROR_EXIT(errcode); |
2773 | 2586 |
2774 /* Add a dummy node for the final state. | 2587 /* Add a dummy node for the final state. |
2775 XXX - For certain patterns this dummy node can be optimized away, | 2588 XXX - For certain patterns this dummy node can be optimized away, |
2776 » for example "a*" or "ab*".» Figure out a simple way to detect | 2589 for example "a*" or "ab*".» Figure out a simple way to detect |
2777 » this possibility. */ | 2590 this possibility. */ |
2778 tmp_ast_l = tree; | 2591 tmp_ast_l = tree; |
2779 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++); | 2592 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++); |
2780 if (tmp_ast_r == NULL) | 2593 if (tmp_ast_r == NULL) |
2781 ERROR_EXIT(REG_ESPACE); | 2594 ERROR_EXIT(REG_ESPACE); |
2782 | 2595 |
2783 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); | 2596 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); |
2784 if (tree == NULL) | 2597 if (tree == NULL) |
2785 ERROR_EXIT(REG_ESPACE); | 2598 ERROR_EXIT(REG_ESPACE); |
2786 | 2599 |
2787 errcode = tre_compute_nfl(mem, stack, tree); | 2600 errcode = tre_compute_nfl(mem, stack, tree); |
2788 if (errcode != REG_OK) | 2601 if (errcode != REG_OK) |
2789 ERROR_EXIT(errcode); | 2602 ERROR_EXIT(errcode); |
2790 | 2603 |
2791 counts = xmalloc(sizeof(int) * parse_ctx.position); | 2604 counts = xmalloc(sizeof(int) * parse_ctx.position); |
2792 if (counts == NULL) | 2605 if (counts == NULL) |
2793 ERROR_EXIT(REG_ESPACE); | 2606 ERROR_EXIT(REG_ESPACE); |
2794 | 2607 |
2795 offs = xmalloc(sizeof(int) * parse_ctx.position); | 2608 offs = xmalloc(sizeof(int) * parse_ctx.position); |
2796 if (offs == NULL) | 2609 if (offs == NULL) |
2797 ERROR_EXIT(REG_ESPACE); | 2610 ERROR_EXIT(REG_ESPACE); |
2798 | 2611 |
2799 for (i = 0; i < parse_ctx.position; i++) | 2612 for (i = 0; i < parse_ctx.position; i++) |
2800 counts[i] = 0; | 2613 counts[i] = 0; |
2801 tre_ast_to_tnfa(tree, NULL, counts, NULL); | 2614 tre_ast_to_tnfa(tree, NULL, counts, NULL); |
2802 | 2615 |
2803 add = 0; | 2616 add = 0; |
2804 for (i = 0; i < parse_ctx.position; i++) | 2617 for (i = 0; i < parse_ctx.position; i++) { |
2805 { | 2618 offs[i] = add; |
2806 offs[i] = add; | 2619 add += counts[i] + 1; |
2807 add += counts[i] + 1; | 2620 counts[i] = 0; |
2808 counts[i] = 0; | 2621 } |
2809 } | |
2810 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions)); | 2622 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions)); |
2811 if (transitions == NULL) | 2623 if (transitions == NULL) |
2812 ERROR_EXIT(REG_ESPACE); | 2624 ERROR_EXIT(REG_ESPACE); |
2813 tnfa->transitions = transitions; | 2625 tnfa->transitions = transitions; |
2814 tnfa->num_transitions = add; | 2626 tnfa->num_transitions = add; |
2815 | 2627 |
2816 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); | 2628 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); |
2817 if (errcode != REG_OK) | 2629 if (errcode != REG_OK) |
2818 ERROR_EXIT(errcode); | 2630 ERROR_EXIT(errcode); |
2819 | 2631 |
2820 tnfa->firstpos_chars = NULL; | 2632 tnfa->firstpos_chars = NULL; |
2821 | 2633 |
2822 p = tree->firstpos; | 2634 p = tree->firstpos; |
2823 i = 0; | 2635 i = 0; |
2824 while (p->position >= 0) | 2636 while (p->position >= 0) { |
2825 { | 2637 i++; |
2826 i++; | 2638 p++; |
2827 p++; | 2639 } |
2828 } | |
2829 | 2640 |
2830 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t)); | 2641 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t)); |
2831 if (initial == NULL) | 2642 if (initial == NULL) |
2832 ERROR_EXIT(REG_ESPACE); | 2643 ERROR_EXIT(REG_ESPACE); |
2833 tnfa->initial = initial; | 2644 tnfa->initial = initial; |
2834 | 2645 |
2835 i = 0; | 2646 i = 0; |
2836 for (p = tree->firstpos; p->position >= 0; p++) | 2647 for (p = tree->firstpos; p->position >= 0; p++) { |
2837 { | 2648 initial[i].state = transitions + offs[p->position]; |
2838 initial[i].state = transitions + offs[p->position]; | 2649 initial[i].state_id = p->position; |
2839 initial[i].state_id = p->position; | 2650 initial[i].tags = NULL; |
2840 initial[i].tags = NULL; | 2651 /* Copy the arrays p->tags, and p->params, they are allocated |
2841 /* Copy the arrays p->tags, and p->params, they are allocated | 2652 from a tre_mem object. */ |
2842 » from a tre_mem object. */ | 2653 if (p->tags) { |
2843 if (p->tags) | 2654 int j; |
2844 » { | 2655 for (j = 0; p->tags[j] >= 0; j++) |
2845 » int j; | 2656 ; |
2846 » for (j = 0; p->tags[j] >= 0; j++); | 2657 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); |
2847 » initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); | 2658 if (!initial[i].tags) |
2848 » if (!initial[i].tags) | 2659 ERROR_EXIT(REG_ESPACE); |
2849 » ERROR_EXIT(REG_ESPACE); | 2660 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1)); |
2850 » memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1)); | |
2851 » } | |
2852 initial[i].assertions = p->assertions; | |
2853 i++; | |
2854 } | 2661 } |
| 2662 initial[i].assertions = p->assertions; |
| 2663 i++; |
| 2664 } |
2855 initial[i].state = NULL; | 2665 initial[i].state = NULL; |
2856 | 2666 |
2857 tnfa->num_transitions = add; | 2667 tnfa->num_transitions = add; |
2858 tnfa->final = transitions + offs[tree->lastpos[0].position]; | 2668 tnfa->final = transitions + offs[tree->lastpos[0].position]; |
2859 tnfa->num_states = parse_ctx.position; | 2669 tnfa->num_states = parse_ctx.position; |
2860 tnfa->cflags = cflags; | 2670 tnfa->cflags = cflags; |
2861 | 2671 |
2862 tre_mem_destroy(mem); | 2672 tre_mem_destroy(mem); |
2863 tre_stack_destroy(stack); | 2673 tre_stack_destroy(stack); |
2864 xfree(counts); | 2674 xfree(counts); |
2865 xfree(offs); | 2675 xfree(offs); |
2866 | 2676 |
2867 preg->TRE_REGEX_T_FIELD = (void *)tnfa; | 2677 preg->TRE_REGEX_T_FIELD = (void*)tnfa; |
2868 return REG_OK; | 2678 return REG_OK; |
2869 | 2679 |
2870 error_exit: | 2680 error_exit: |
2871 /* Free everything that was allocated and return the error code. */ | 2681 /* Free everything that was allocated and return the error code. */ |
2872 tre_mem_destroy(mem); | 2682 tre_mem_destroy(mem); |
2873 if (stack != NULL) | 2683 if (stack != NULL) |
2874 tre_stack_destroy(stack); | 2684 tre_stack_destroy(stack); |
2875 if (counts != NULL) | 2685 if (counts != NULL) |
2876 xfree(counts); | 2686 xfree(counts); |
2877 if (offs != NULL) | 2687 if (offs != NULL) |
2878 xfree(offs); | 2688 xfree(offs); |
2879 preg->TRE_REGEX_T_FIELD = (void *)tnfa; | 2689 preg->TRE_REGEX_T_FIELD = (void*)tnfa; |
2880 regfree(preg); | 2690 regfree(preg); |
2881 return errcode; | 2691 return errcode; |
2882 } | 2692 } |
2883 | 2693 |
| 2694 void regfree(regex_t* preg) { |
| 2695 tre_tnfa_t* tnfa; |
| 2696 unsigned int i; |
| 2697 tre_tnfa_transition_t* trans; |
2884 | 2698 |
2885 | 2699 tnfa = (void*)preg->TRE_REGEX_T_FIELD; |
2886 | |
2887 void | |
2888 regfree(regex_t *preg) | |
2889 { | |
2890 tre_tnfa_t *tnfa; | |
2891 unsigned int i; | |
2892 tre_tnfa_transition_t *trans; | |
2893 | |
2894 tnfa = (void *)preg->TRE_REGEX_T_FIELD; | |
2895 if (!tnfa) | 2700 if (!tnfa) |
2896 return; | 2701 return; |
2897 | 2702 |
2898 for (i = 0; i < tnfa->num_transitions; i++) | 2703 for (i = 0; i < tnfa->num_transitions; i++) |
2899 if (tnfa->transitions[i].state) | 2704 if (tnfa->transitions[i].state) { |
2900 { | 2705 if (tnfa->transitions[i].tags) |
2901 » if (tnfa->transitions[i].tags) | 2706 xfree(tnfa->transitions[i].tags); |
2902 » xfree(tnfa->transitions[i].tags); | 2707 if (tnfa->transitions[i].neg_classes) |
2903 » if (tnfa->transitions[i].neg_classes) | 2708 xfree(tnfa->transitions[i].neg_classes); |
2904 » xfree(tnfa->transitions[i].neg_classes); | 2709 } |
2905 } | |
2906 if (tnfa->transitions) | 2710 if (tnfa->transitions) |
2907 xfree(tnfa->transitions); | 2711 xfree(tnfa->transitions); |
2908 | 2712 |
2909 if (tnfa->initial) | 2713 if (tnfa->initial) { |
2910 { | 2714 for (trans = tnfa->initial; trans->state; trans++) { |
2911 for (trans = tnfa->initial; trans->state; trans++) | 2715 if (trans->tags) |
2912 » { | 2716 xfree(trans->tags); |
2913 » if (trans->tags) | |
2914 » xfree(trans->tags); | |
2915 » } | |
2916 xfree(tnfa->initial); | |
2917 } | 2717 } |
| 2718 xfree(tnfa->initial); |
| 2719 } |
2918 | 2720 |
2919 if (tnfa->submatch_data) | 2721 if (tnfa->submatch_data) { |
2920 { | 2722 for (i = 0; i < tnfa->num_submatches; i++) |
2921 for (i = 0; i < tnfa->num_submatches; i++) | 2723 if (tnfa->submatch_data[i].parents) |
2922 » if (tnfa->submatch_data[i].parents) | 2724 xfree(tnfa->submatch_data[i].parents); |
2923 » xfree(tnfa->submatch_data[i].parents); | 2725 xfree(tnfa->submatch_data); |
2924 xfree(tnfa->submatch_data); | 2726 } |
2925 } | |
2926 | 2727 |
2927 if (tnfa->tag_directions) | 2728 if (tnfa->tag_directions) |
2928 xfree(tnfa->tag_directions); | 2729 xfree(tnfa->tag_directions); |
2929 if (tnfa->firstpos_chars) | 2730 if (tnfa->firstpos_chars) |
2930 xfree(tnfa->firstpos_chars); | 2731 xfree(tnfa->firstpos_chars); |
2931 if (tnfa->minimal_tags) | 2732 if (tnfa->minimal_tags) |
2932 xfree(tnfa->minimal_tags); | 2733 xfree(tnfa->minimal_tags); |
2933 xfree(tnfa); | 2734 xfree(tnfa); |
2934 } | 2735 } |
OLD | NEW |