Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(155)

Side by Side Diff: fusl/src/regex/regcomp.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: headers too Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 regcomp.c - TRE POSIX compatible regex compilation functions. 2 regcomp.c - TRE POSIX compatible regex compilation functions.
3 3
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5 All rights reserved. 5 All rights reserved.
6 6
7 Redistribution and use in source and binary forms, with or without 7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions 8 modification, are permitted provided that the following conditions
9 are met: 9 are met:
10 10
(...skipping 30 matching lines...) Expand all
41 #include <assert.h> 41 #include <assert.h>
42 42
43 /*********************************************************************** 43 /***********************************************************************
44 from tre-compile.h 44 from tre-compile.h
45 ***********************************************************************/ 45 ***********************************************************************/
46 46
47 typedef struct { 47 typedef struct {
48 int position; 48 int position;
49 int code_min; 49 int code_min;
50 int code_max; 50 int code_max;
51 int *tags; 51 int* tags;
52 int assertions; 52 int assertions;
53 tre_ctype_t class; 53 tre_ctype_t class;
54 tre_ctype_t *neg_classes; 54 tre_ctype_t* neg_classes;
55 int backref; 55 int backref;
56 } tre_pos_and_tags_t; 56 } tre_pos_and_tags_t;
57 57
58
59 /*********************************************************************** 58 /***********************************************************************
60 from tre-ast.c and tre-ast.h 59 from tre-ast.c and tre-ast.h
61 ***********************************************************************/ 60 ***********************************************************************/
62 61
63 /* The different AST node types. */ 62 /* The different AST node types. */
64 typedef enum { 63 typedef enum { LITERAL, CATENATION, ITERATION, UNION } tre_ast_type_t;
65 LITERAL,
66 CATENATION,
67 ITERATION,
68 UNION
69 } tre_ast_type_t;
70 64
71 /* Special subtypes of TRE_LITERAL. */ 65 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY» -1 /* Empty leaf (denotes empty string). */ 66 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */ 67 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG» -3 /* Tag leaf. */ 68 #define TAG -3 /* Tag leaf. */
75 #define BACKREF» -4 /* Back reference leaf. */ 69 #define BACKREF -4 /* Back reference leaf. */
76 70
77 #define IS_SPECIAL(x)» ((x)->code_min < 0) 71 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x)» ((x)->code_min == EMPTY) 72 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) 73 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x)» ((x)->code_min == TAG) 74 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x)» ((x)->code_min == BACKREF) 75 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
82
83 76
84 /* A generic AST node. All AST nodes consist of this node on the top 77 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */ 78 level with `obj' pointing to the actual content. */
86 typedef struct { 79 typedef struct {
87 tre_ast_type_t type; /* Type of the node. */ 80 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */ 81 void* obj; /* Pointer to actual node. */
89 int nullable; 82 int nullable;
90 int submatch_id; 83 int submatch_id;
91 int num_submatches; 84 int num_submatches;
92 int num_tags; 85 int num_tags;
93 tre_pos_and_tags_t *firstpos; 86 tre_pos_and_tags_t* firstpos;
94 tre_pos_and_tags_t *lastpos; 87 tre_pos_and_tags_t* lastpos;
95 } tre_ast_node_t; 88 } tre_ast_node_t;
96 89
97
98 /* A "literal" node. These are created for assertions, back references, 90 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one 91 tags, matching parameter settings, and all expressions that match one
100 character. */ 92 character. */
101 typedef struct { 93 typedef struct {
102 long code_min; 94 long code_min;
103 long code_max; 95 long code_max;
104 int position; 96 int position;
105 tre_ctype_t class; 97 tre_ctype_t class;
106 tre_ctype_t *neg_classes; 98 tre_ctype_t* neg_classes;
107 } tre_literal_t; 99 } tre_literal_t;
108 100
109 /* A "catenation" node. These are created when two regexps are concatenated. 101 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part 102 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression 103 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */ 104 (catenation is left associative). */
113 typedef struct { 105 typedef struct {
114 tre_ast_node_t *left; 106 tre_ast_node_t* left;
115 tre_ast_node_t *right; 107 tre_ast_node_t* right;
116 } tre_catenation_t; 108 } tre_catenation_t;
117 109
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" 110 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
119 operators. */ 111 operators. */
120 typedef struct { 112 typedef struct {
121 /* Subexpression to match. */ 113 /* Subexpression to match. */
122 tre_ast_node_t *arg; 114 tre_ast_node_t* arg;
123 /* Minimum number of consecutive matches. */ 115 /* Minimum number of consecutive matches. */
124 int min; 116 int min;
125 /* Maximum number of consecutive matches. */ 117 /* Maximum number of consecutive matches. */
126 int max; 118 int max;
127 /* If 0, match as many characters as possible, if 1 match as few as 119 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as 120 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */ 121 matching as many/few repetitions as possible. */
130 unsigned int minimal:1; 122 unsigned int minimal : 1;
131 } tre_iteration_t; 123 } tre_iteration_t;
132 124
133 /* An "union" node. These are created for the "|" operator. */ 125 /* An "union" node. These are created for the "|" operator. */
134 typedef struct { 126 typedef struct {
135 tre_ast_node_t *left; 127 tre_ast_node_t* left;
136 tre_ast_node_t *right; 128 tre_ast_node_t* right;
137 } tre_union_t; 129 } tre_union_t;
138 130
139 131 static tre_ast_node_t* tre_ast_new_node(tre_mem_t mem, int type, void* obj) {
140 static tre_ast_node_t * 132 tre_ast_node_t* node = tre_mem_calloc(mem, sizeof *node);
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj) 133 if (!node || !obj)
142 { 134 return 0;
143 » tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node); 135 node->obj = obj;
144 » if (!node || !obj) 136 node->type = type;
145 » » return 0; 137 node->nullable = -1;
146 » node->obj = obj; 138 node->submatch_id = -1;
147 » node->type = type; 139 return node;
148 » node->nullable = -1;
149 » node->submatch_id = -1;
150 » return node;
151 } 140 }
152 141
153 static tre_ast_node_t * 142 static tre_ast_node_t* tre_ast_new_literal(tre_mem_t mem,
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position) 143 int code_min,
155 { 144 int code_max,
156 » tre_ast_node_t *node; 145 int position) {
157 » tre_literal_t *lit; 146 tre_ast_node_t* node;
147 tre_literal_t* lit;
158 148
159 » lit = tre_mem_calloc(mem, sizeof *lit); 149 lit = tre_mem_calloc(mem, sizeof *lit);
160 » node = tre_ast_new_node(mem, LITERAL, lit); 150 node = tre_ast_new_node(mem, LITERAL, lit);
161 » if (!node) 151 if (!node)
162 » » return 0; 152 return 0;
163 » lit->code_min = code_min; 153 lit->code_min = code_min;
164 » lit->code_max = code_max; 154 lit->code_max = code_max;
165 » lit->position = position; 155 lit->position = position;
166 » return node; 156 return node;
167 } 157 }
168 158
169 static tre_ast_node_t * 159 static tre_ast_node_t* tre_ast_new_iter(tre_mem_t mem,
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minim al) 160 tre_ast_node_t* arg,
171 { 161 int min,
172 » tre_ast_node_t *node; 162 int max,
173 » tre_iteration_t *iter; 163 int minimal) {
164 tre_ast_node_t* node;
165 tre_iteration_t* iter;
174 166
175 » iter = tre_mem_calloc(mem, sizeof *iter); 167 iter = tre_mem_calloc(mem, sizeof *iter);
176 » node = tre_ast_new_node(mem, ITERATION, iter); 168 node = tre_ast_new_node(mem, ITERATION, iter);
177 » if (!node) 169 if (!node)
178 » » return 0; 170 return 0;
179 » iter->arg = arg; 171 iter->arg = arg;
180 » iter->min = min; 172 iter->min = min;
181 » iter->max = max; 173 iter->max = max;
182 » iter->minimal = minimal; 174 iter->minimal = minimal;
183 » node->num_submatches = arg->num_submatches; 175 node->num_submatches = arg->num_submatches;
184 » return node; 176 return node;
185 } 177 }
186 178
187 static tre_ast_node_t * 179 static tre_ast_node_t* tre_ast_new_union(tre_mem_t mem,
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right) 180 tre_ast_node_t* left,
189 { 181 tre_ast_node_t* right) {
190 » tre_ast_node_t *node; 182 tre_ast_node_t* node;
191 » tre_union_t *un; 183 tre_union_t* un;
192 184
193 » if (!left) 185 if (!left)
194 » » return right; 186 return right;
195 » un = tre_mem_calloc(mem, sizeof *un); 187 un = tre_mem_calloc(mem, sizeof *un);
196 » node = tre_ast_new_node(mem, UNION, un); 188 node = tre_ast_new_node(mem, UNION, un);
197 » if (!node || !right) 189 if (!node || !right)
198 » » return 0; 190 return 0;
199 » un->left = left; 191 un->left = left;
200 » un->right = right; 192 un->right = right;
201 » node->num_submatches = left->num_submatches + right->num_submatches; 193 node->num_submatches = left->num_submatches + right->num_submatches;
202 » return node; 194 return node;
203 } 195 }
204 196
205 static tre_ast_node_t * 197 static tre_ast_node_t* tre_ast_new_catenation(tre_mem_t mem,
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *righ t) 198 tre_ast_node_t* left,
207 { 199 tre_ast_node_t* right) {
208 » tre_ast_node_t *node; 200 tre_ast_node_t* node;
209 » tre_catenation_t *cat; 201 tre_catenation_t* cat;
210 202
211 » if (!left) 203 if (!left)
212 » » return right; 204 return right;
213 » cat = tre_mem_calloc(mem, sizeof *cat); 205 cat = tre_mem_calloc(mem, sizeof *cat);
214 » node = tre_ast_new_node(mem, CATENATION, cat); 206 node = tre_ast_new_node(mem, CATENATION, cat);
215 » if (!node) 207 if (!node)
216 » » return 0; 208 return 0;
217 » cat->left = left; 209 cat->left = left;
218 » cat->right = right; 210 cat->right = right;
219 » node->num_submatches = left->num_submatches + right->num_submatches; 211 node->num_submatches = left->num_submatches + right->num_submatches;
220 » return node; 212 return node;
221 } 213 }
222 214
223
224 /*********************************************************************** 215 /***********************************************************************
225 from tre-stack.c and tre-stack.h 216 from tre-stack.c and tre-stack.h
226 ***********************************************************************/ 217 ***********************************************************************/
227 218
228 typedef struct tre_stack_rec tre_stack_t; 219 typedef struct tre_stack_rec tre_stack_t;
229 220
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size' 221 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be 222 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack 223 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */ 224 object or NULL if out of memory. */
234 static tre_stack_t * 225 static tre_stack_t* tre_stack_new(int size, int max_size, int increment);
235 tre_stack_new(int size, int max_size, int increment);
236 226
237 /* Frees the stack object. */ 227 /* Frees the stack object. */
238 static void 228 static void tre_stack_destroy(tre_stack_t* s);
239 tre_stack_destroy(tre_stack_t *s);
240 229
241 /* Returns the current number of objects in the stack. */ 230 /* Returns the current number of objects in the stack. */
242 static int 231 static int tre_stack_num_objects(tre_stack_t* s);
243 tre_stack_num_objects(tre_stack_t *s);
244 232
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes 233 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory. 234 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size 235 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */ 236 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type)» » » » » \ 237 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value) 238 static reg_errcode_t tre_stack_push_##typetag(tre_stack_t* s, type value)
251 239
252 declare_pushf(voidptr, void *); 240 declare_pushf(voidptr, void*);
253 declare_pushf(int, int); 241 declare_pushf(int, int);
254 242
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost 243 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be 244 element off of stack `s' and returns it. The stack must not be
257 empty. */ 245 empty. */
258 #define declare_popf(typetag, type)» » \ 246 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s) 247 static type tre_stack_pop_##typetag(tre_stack_t* s)
260 248
261 declare_popf(voidptr, void *); 249 declare_popf(voidptr, void*);
262 declare_popf(int, int); 250 declare_popf(int, int);
263 251
264 /* Just to save some typing. */ 252 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value)» » » » » \ 253 #define STACK_PUSH(s, typetag, value) \
266 do» » » » » » » » » \ 254 do { \
267 {» » » » » » » » » \ 255 status = tre_stack_push_##typetag(s, value); \
268 status = tre_stack_push_ ## typetag(s, value);» » » \ 256 } while (/*CONSTCOND*/ 0)
269 }» » » » » » » » » \
270 while (/*CONSTCOND*/0)
271 257
272 #define STACK_PUSHX(s, typetag, value)» » » » » \ 258 #define STACK_PUSHX(s, typetag, value) \
273 {» » » » » » » » » \ 259 { \
274 status = tre_stack_push_ ## typetag(s, value);» » » \ 260 status = tre_stack_push_##typetag(s, value); \
275 if (status != REG_OK)» » » » » » \ 261 if (status != REG_OK) \
276 break;» » » » » » » » \ 262 break; \
277 } 263 }
278 264
279 #define STACK_PUSHR(s, typetag, value)» » » » » \ 265 #define STACK_PUSHR(s, typetag, value) \
280 {» » » » » » » » » \ 266 { \
281 reg_errcode_t _status;» » » » » » \ 267 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value);» » » \ 268 _status = tre_stack_push_##typetag(s, value); \
283 if (_status != REG_OK)» » » » » » \ 269 if (_status != REG_OK) \
284 return _status;» » » » » » » \ 270 return _status; \
285 } 271 }
286 272
287 union tre_stack_item { 273 union tre_stack_item {
288 void *voidptr_value; 274 void* voidptr_value;
289 int int_value; 275 int int_value;
290 }; 276 };
291 277
292 struct tre_stack_rec { 278 struct tre_stack_rec {
293 int size; 279 int size;
294 int max_size; 280 int max_size;
295 int increment; 281 int increment;
296 int ptr; 282 int ptr;
297 union tre_stack_item *stack; 283 union tre_stack_item* stack;
298 }; 284 };
299 285
300 286 static tre_stack_t* tre_stack_new(int size, int max_size, int increment) {
301 static tre_stack_t * 287 tre_stack_t* s;
302 tre_stack_new(int size, int max_size, int increment)
303 {
304 tre_stack_t *s;
305 288
306 s = xmalloc(sizeof(*s)); 289 s = xmalloc(sizeof(*s));
307 if (s != NULL) 290 if (s != NULL) {
308 { 291 s->stack = xmalloc(sizeof(*s->stack) * size);
309 s->stack = xmalloc(sizeof(*s->stack) * size); 292 if (s->stack == NULL) {
310 if (s->stack == NULL) 293 xfree(s);
311 » { 294 return NULL;
312 » xfree(s);
313 » return NULL;
314 » }
315 s->size = size;
316 s->max_size = max_size;
317 s->increment = increment;
318 s->ptr = 0;
319 } 295 }
296 s->size = size;
297 s->max_size = max_size;
298 s->increment = increment;
299 s->ptr = 0;
300 }
320 return s; 301 return s;
321 } 302 }
322 303
323 static void 304 static void tre_stack_destroy(tre_stack_t* s) {
324 tre_stack_destroy(tre_stack_t *s)
325 {
326 xfree(s->stack); 305 xfree(s->stack);
327 xfree(s); 306 xfree(s);
328 } 307 }
329 308
330 static int 309 static int tre_stack_num_objects(tre_stack_t* s) {
331 tre_stack_num_objects(tre_stack_t *s)
332 {
333 return s->ptr; 310 return s->ptr;
334 } 311 }
335 312
336 static reg_errcode_t 313 static reg_errcode_t tre_stack_push(tre_stack_t* s,
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value) 314 union tre_stack_item value) {
338 { 315 if (s->ptr < s->size) {
339 if (s->ptr < s->size) 316 s->stack[s->ptr] = value;
340 { 317 s->ptr++;
341 s->stack[s->ptr] = value; 318 } else {
342 s->ptr++; 319 if (s->size >= s->max_size) {
320 return REG_ESPACE;
321 } else {
322 union tre_stack_item* new_buffer;
323 int new_size;
324 new_size = s->size + s->increment;
325 if (new_size > s->max_size)
326 new_size = s->max_size;
327 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
328 if (new_buffer == NULL) {
329 return REG_ESPACE;
330 }
331 assert(new_size > s->size);
332 s->size = new_size;
333 s->stack = new_buffer;
334 tre_stack_push(s, value);
343 } 335 }
344 else 336 }
345 {
346 if (s->size >= s->max_size)
347 » {
348 » return REG_ESPACE;
349 » }
350 else
351 » {
352 » union tre_stack_item *new_buffer;
353 » int new_size;
354 » new_size = s->size + s->increment;
355 » if (new_size > s->max_size)
356 » new_size = s->max_size;
357 » new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 » if (new_buffer == NULL)
359 » {
360 » return REG_ESPACE;
361 » }
362 » assert(new_size > s->size);
363 » s->size = new_size;
364 » s->stack = new_buffer;
365 » tre_stack_push(s, value);
366 » }
367 }
368 return REG_OK; 337 return REG_OK;
369 } 338 }
370 339
371 #define define_pushf(typetag, type) \ 340 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \ 341 declare_pushf(typetag, type) { \
373 union tre_stack_item item;» \ 342 union tre_stack_item item; \
374 item.typetag ## _value = value; \ 343 item.typetag##_value = value; \
375 return tre_stack_push(s, item); \ 344 return tre_stack_push(s, item); \
376 }
377
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
380
381 #define define_popf(typetag, type)» » \
382 declare_popf(typetag, type) {»» » \
383 return s->stack[--s->ptr].typetag ## _value; \
384 } 345 }
385 346
386 define_popf(int, int) 347 define_pushf(int, int) define_pushf(voidptr, void*)
387 define_popf(voidptr, void *) 348 #define define_popf(typetag, type) \
349 declare_popf(typetag, type) { return s->stack[--s->ptr].typetag##_value; }
388 350
351 define_popf(int, int) define_popf(voidptr, void*)
389 352
390 /*********************************************************************** 353 /***********************************************************************
391 from tre-parse.c and tre-parse.h 354 from tre-parse.c and tre-parse.h
392 ***********************************************************************/ 355 ***********************************************************************/
393 356
394 /* Parse context. */ 357 /* Parse context. */
395 typedef struct { 358 typedef struct {
396 » /* Memory allocator. The AST is allocated using this. */ 359 /* Memory allocator. The AST is allocated using this. */
397 » tre_mem_t mem; 360 tre_mem_t mem;
398 » /* Stack used for keeping track of regexp syntax. */ 361 /* Stack used for keeping track of regexp syntax. */
399 » tre_stack_t *stack; 362 tre_stack_t* stack;
400 » /* The parsed node after a parse function returns. */ 363 /* The parsed node after a parse function returns. */
401 » tre_ast_node_t *n; 364 tre_ast_node_t* n;
402 » /* Position in the regexp pattern after a parse function returns. */ 365 /* Position in the regexp pattern after a parse function returns. */
403 » const char *s; 366 const char* s;
404 » /* The first character of the regexp. */ 367 /* The first character of the regexp. */
405 » const char *re; 368 const char* re;
406 » /* Current submatch ID. */ 369 /* Current submatch ID. */
407 » int submatch_id; 370 int submatch_id;
408 » /* Current position (number of literal). */ 371 /* Current position (number of literal). */
409 » int position; 372 int position;
410 » /* The highest back reference or -1 if none seen so far. */ 373 /* The highest back reference or -1 if none seen so far. */
411 » int max_backref; 374 int max_backref;
412 » /* Compilation flags. */ 375 /* Compilation flags. */
413 » int cflags; 376 int cflags;
414 } tre_parse_ctx_t; 377 } tre_parse_ctx_t;
415 378
416 /* Some macros for expanding \w, \s, etc. */ 379 /* Some macros for expanding \w, \s, etc. */
417 static const struct { 380 static const struct {
418 » char c; 381 char c;
419 » const char *expansion; 382 const char* expansion;
420 } tre_macros[] = { 383 } tre_macros[] = {{'t', "\t"},
421 » {'t', "\t"}, {'n', "\n"}, {'r', "\r"}, 384 {'n', "\n"},
422 » {'f', "\f"}, {'a', "\a"}, {'e', "\033"}, 385 {'r', "\r"},
423 » {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"}, 386 {'f', "\f"},
424 » {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"}, 387 {'a', "\a"},
425 » { 0, 0 } 388 {'e', "\033"},
426 }; 389 {'w', "[[:alnum:]_]"},
390 {'W', "[^[:alnum:]_]"},
391 {'s', "[[:space:]]"},
392 {'S', "[^[:space:]]"},
393 {'d', "[[:digit:]]"},
394 {'D', "[^[:digit:]]"},
395 {0, 0}};
427 396
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which 397 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there 398 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */ 399 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s) 400 static const char* tre_expand_macro(const char* s) {
432 { 401 int i;
433 » int i; 402 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++)
434 » for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++); 403 ;
435 » return tre_macros[i].expansion; 404 return tre_macros[i].expansion;
436 } 405 }
437 406
438 static int 407 static int tre_compare_lit(const void* a, const void* b) {
439 tre_compare_lit(const void *a, const void *b) 408 const tre_literal_t* const* la = a;
440 { 409 const tre_literal_t* const* lb = b;
441 » const tre_literal_t *const *la = a; 410 /* assumes the range of valid code_min is < INT_MAX */
442 » const tre_literal_t *const *lb = b; 411 return la[0]->code_min - lb[0]->code_min;
443 » /* assumes the range of valid code_min is < INT_MAX */
444 » return la[0]->code_min - lb[0]->code_min;
445 } 412 }
446 413
447 struct literals { 414 struct literals {
448 » tre_mem_t mem; 415 tre_mem_t mem;
449 » tre_literal_t **a; 416 tre_literal_t** a;
450 » int len; 417 int len;
451 » int cap; 418 int cap;
452 }; 419 };
453 420
454 static tre_literal_t *tre_new_lit(struct literals *p) 421 static tre_literal_t* tre_new_lit(struct literals* p) {
455 { 422 tre_literal_t** a;
456 » tre_literal_t **a; 423 if (p->len >= p->cap) {
457 » if (p->len >= p->cap) { 424 if (p->cap >= 1 << 15)
458 » » if (p->cap >= 1<<15) 425 return 0;
459 » » » return 0; 426 p->cap *= 2;
460 » » p->cap *= 2; 427 a = xrealloc(p->a, p->cap * sizeof *p->a);
461 » » a = xrealloc(p->a, p->cap * sizeof *p->a); 428 if (!a)
462 » » if (!a) 429 return 0;
463 » » » return 0; 430 p->a = a;
464 » » p->a = a; 431 }
465 » } 432 a = p->a + p->len++;
466 » a = p->a + p->len++; 433 *a = tre_mem_calloc(p->mem, sizeof **a);
467 » *a = tre_mem_calloc(p->mem, sizeof **a); 434 return *a;
468 » return *a;
469 } 435 }
470 436
471 static int add_icase_literals(struct literals *ls, int min, int max) 437 static int add_icase_literals(struct literals* ls, int min, int max) {
472 { 438 tre_literal_t* lit;
473 » tre_literal_t *lit; 439 int b, e, c;
474 » int b, e, c; 440 for (c = min; c <= max;) {
475 » for (c=min; c<=max; ) { 441 /* assumes islower(c) and isupper(c) are exclusive
476 » » /* assumes islower(c) and isupper(c) are exclusive 442 and toupper(c)!=c if islower(c).
477 » » and toupper(c)!=c if islower(c). 443 multiple opposite case characters are not supported */
478 » » multiple opposite case characters are not supported */ 444 if (tre_islower(c)) {
479 » » if (tre_islower(c)) { 445 b = e = tre_toupper(c);
480 » » » b = e = tre_toupper(c); 446 for (c++, e++; c <= max; c++, e++)
481 » » » for (c++, e++; c<=max; c++, e++) 447 if (tre_toupper(c) != e)
482 » » » » if (tre_toupper(c) != e) break; 448 break;
483 » » } else if (tre_isupper(c)) { 449 } else if (tre_isupper(c)) {
484 » » » b = e = tre_tolower(c); 450 b = e = tre_tolower(c);
485 » » » for (c++, e++; c<=max; c++, e++) 451 for (c++, e++; c <= max; c++, e++)
486 » » » » if (tre_tolower(c) != e) break; 452 if (tre_tolower(c) != e)
487 » » } else { 453 break;
488 » » » c++; 454 } else {
489 » » » continue; 455 c++;
490 » » } 456 continue;
491 » » lit = tre_new_lit(ls); 457 }
492 » » if (!lit) 458 lit = tre_new_lit(ls);
493 » » » return -1; 459 if (!lit)
494 » » lit->code_min = b; 460 return -1;
495 » » lit->code_max = e-1; 461 lit->code_min = b;
496 » » lit->position = -1; 462 lit->code_max = e - 1;
497 » } 463 lit->position = -1;
498 » return 0; 464 }
465 return 0;
499 } 466 }
500 467
501
502 /* Maximum number of character classes in a negated bracket expression. */ 468 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64 469 #define MAX_NEG_CLASSES 64
504 470
505 struct neg { 471 struct neg {
506 » int negate; 472 int negate;
507 » int len; 473 int len;
508 » tre_ctype_t a[MAX_NEG_CLASSES]; 474 tre_ctype_t a[MAX_NEG_CLASSES];
509 }; 475 };
510 476
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges 477 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
512 478
513 /* 479 /*
514 bracket grammar: 480 bracket grammar:
515 Bracket = '[' List ']' | '[^' List ']' 481 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term 482 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass 483 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-' 484 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single 485 Char = Coll | coll_single
520 Meta = ']' | '-' 486 Meta = ']' | '-'
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]' 487 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]' 488 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]' 489 Chclass = '[:' class ':]'
524 490
525 coll_single is a single char collating element but it can be 491 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and 492 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and 493 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '[' 494 '^' anywhere except after the openning '['
529 */ 495 */
530 496
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, st ruct literals *ls, struct neg *neg) 497 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t* ctx,
532 { 498 const char* s,
533 » const char *start = s; 499 struct literals* ls,
534 » tre_ctype_t class; 500 struct neg* neg) {
535 » int min, max; 501 const char* start = s;
536 » wchar_t wc; 502 tre_ctype_t class;
537 » int len; 503 int min, max;
538 504 wchar_t wc;
539 » for (;;) { 505 int len;
540 » » class = 0; 506
541 » » len = mbtowc(&wc, s, -1); 507 for (;;) {
542 » » if (len <= 0) 508 class = 0;
543 » » » return *s ? REG_BADPAT : REG_EBRACK; 509 len = mbtowc(&wc, s, -1);
544 » » if (*s == ']' && s != start) { 510 if (len <= 0)
545 » » » ctx->s = s+1; 511 return *s ? REG_BADPAT : REG_EBRACK;
546 » » » return REG_OK; 512 if (*s == ']' && s != start) {
547 » » } 513 ctx->s = s + 1;
548 » » if (*s == '-' && s != start && s[1] != ']' && 514 return REG_OK;
549 » » /* extension: [a-z--@] is accepted as [a-z]|[--@] */ 515 }
550 » » (s[1] != '-' || s[2] == ']')) 516 if (*s == '-' && s != start && s[1] != ']' &&
551 » » » return REG_ERANGE; 517 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
552 » » if (*s == '[' && (s[1] == '.' || s[1] == '=')) 518 (s[1] != '-' || s[2] == ']'))
553 » » » /* collating symbols and equivalence classes are not sup ported */ 519 return REG_ERANGE;
554 » » » return REG_ECOLLATE; 520 if (*s == '[' && (s[1] == '.' || s[1] == '='))
555 » » if (*s == '[' && s[1] == ':') { 521 /* collating symbols and equivalence classes are not supported */
556 » » » char tmp[CHARCLASS_NAME_MAX+1]; 522 return REG_ECOLLATE;
557 » » » s += 2; 523 if (*s == '[' && s[1] == ':') {
558 » » » for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) { 524 char tmp[CHARCLASS_NAME_MAX + 1];
559 » » » » if (s[len] == ':') { 525 s += 2;
560 » » » » » memcpy(tmp, s, len); 526 for (len = 0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
561 » » » » » tmp[len] = 0; 527 if (s[len] == ':') {
562 » » » » » class = tre_ctype(tmp); 528 memcpy(tmp, s, len);
563 » » » » » break; 529 tmp[len] = 0;
564 » » » » } 530 class = tre_ctype(tmp);
565 » » » } 531 break;
566 » » » if (!class || s[len+1] != ']') 532 }
567 » » » » return REG_ECTYPE; 533 }
568 » » » min = 0; 534 if (!class || s[len + 1] != ']')
569 » » » max = TRE_CHAR_MAX; 535 return REG_ECTYPE;
570 » » » s += len+2; 536 min = 0;
571 » » } else { 537 max = TRE_CHAR_MAX;
572 » » » min = max = wc; 538 s += len + 2;
573 » » » s += len; 539 } else {
574 » » » if (*s == '-' && s[1] != ']') { 540 min = max = wc;
575 » » » » s++; 541 s += len;
576 » » » » len = mbtowc(&wc, s, -1); 542 if (*s == '-' && s[1] != ']') {
577 » » » » max = wc; 543 s++;
578 » » » » /* XXX - Should use collation order instead of 544 len = mbtowc(&wc, s, -1);
579 » » » » encoding values in character ranges. */ 545 max = wc;
580 » » » » if (len <= 0 || min > max) 546 /* XXX - Should use collation order instead of
581 » » » » » return REG_ERANGE; 547 encoding values in character ranges. */
582 » » » » s += len; 548 if (len <= 0 || min > max)
583 » » » } 549 return REG_ERANGE;
584 » » } 550 s += len;
585 551 }
586 » » if (class && neg->negate) { 552 }
587 » » » if (neg->len >= MAX_NEG_CLASSES) 553
588 » » » » return REG_ESPACE; 554 if (class && neg->negate) {
589 » » » neg->a[neg->len++] = class; 555 if (neg->len >= MAX_NEG_CLASSES)
590 » » } else { 556 return REG_ESPACE;
591 » » » tre_literal_t *lit = tre_new_lit(ls); 557 neg->a[neg->len++] = class;
592 » » » if (!lit) 558 } else {
593 » » » » return REG_ESPACE; 559 tre_literal_t* lit = tre_new_lit(ls);
594 » » » lit->code_min = min; 560 if (!lit)
595 » » » lit->code_max = max; 561 return REG_ESPACE;
596 » » » lit->class = class; 562 lit->code_min = min;
597 » » » lit->position = -1; 563 lit->code_max = max;
598 564 lit->class = class;
599 » » » /* Add opposite-case codepoints if REG_ICASE is present. 565 lit->position = -1;
600 » » » It seems that POSIX requires that bracket negation 566
601 » » » should happen before case-folding, but most practical 567 /* Add opposite-case codepoints if REG_ICASE is present.
602 » » » implementations do it the other way around. Changing 568 It seems that POSIX requires that bracket negation
603 » » » the order would need efficient representation of 569 should happen before case-folding, but most practical
604 » » » case-fold ranges and bracket range sets even with 570 implementations do it the other way around. Changing
605 » » » simple patterns so this is ok for now. */ 571 the order would need efficient representation of
606 » » » if (ctx->cflags & REG_ICASE && !class) 572 case-fold ranges and bracket range sets even with
607 » » » » if (add_icase_literals(ls, min, max)) 573 simple patterns so this is ok for now. */
608 » » » » » return REG_ESPACE; 574 if (ctx->cflags & REG_ICASE && !class)
609 » » } 575 if (add_icase_literals(ls, min, max))
610 » } 576 return REG_ESPACE;
611 } 577 }
612 578 }
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s) 579 }
614 { 580
615 » int i, max, min, negmax, negmin; 581 static reg_errcode_t parse_bracket(tre_parse_ctx_t* ctx, const char* s) {
616 » tre_ast_node_t *node = 0, *n; 582 int i, max, min, negmax, negmin;
617 » tre_ctype_t *nc = 0; 583 tre_ast_node_t *node = 0, *n;
618 » tre_literal_t *lit; 584 tre_ctype_t* nc = 0;
619 » struct literals ls; 585 tre_literal_t* lit;
620 » struct neg neg; 586 struct literals ls;
621 » reg_errcode_t err; 587 struct neg neg;
622 588 reg_errcode_t err;
623 » ls.mem = ctx->mem; 589
624 » ls.len = 0; 590 ls.mem = ctx->mem;
625 » ls.cap = 32; 591 ls.len = 0;
626 » ls.a = xmalloc(ls.cap * sizeof *ls.a); 592 ls.cap = 32;
627 » if (!ls.a) 593 ls.a = xmalloc(ls.cap * sizeof *ls.a);
628 » » return REG_ESPACE; 594 if (!ls.a)
629 » neg.len = 0; 595 return REG_ESPACE;
630 » neg.negate = *s == '^'; 596 neg.len = 0;
631 » if (neg.negate) 597 neg.negate = *s == '^';
632 » » s++; 598 if (neg.negate)
633 599 s++;
634 » err = parse_bracket_terms(ctx, s, &ls, &neg); 600
635 » if (err != REG_OK) 601 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 » » goto parse_bracket_done; 602 if (err != REG_OK)
637 603 goto parse_bracket_done;
638 » if (neg.negate) { 604
639 » » /* Sort the array if we need to negate it. */ 605 if (neg.negate) {
640 » » qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit); 606 /* Sort the array if we need to negate it. */
641 » » /* extra lit for the last negated range */ 607 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
642 » » lit = tre_new_lit(&ls); 608 /* extra lit for the last negated range */
643 » » if (!lit) { 609 lit = tre_new_lit(&ls);
644 » » » err = REG_ESPACE; 610 if (!lit) {
645 » » » goto parse_bracket_done; 611 err = REG_ESPACE;
646 » » } 612 goto parse_bracket_done;
647 » » lit->code_min = TRE_CHAR_MAX+1; 613 }
648 » » lit->code_max = TRE_CHAR_MAX+1; 614 lit->code_min = TRE_CHAR_MAX + 1;
649 » » lit->position = -1; 615 lit->code_max = TRE_CHAR_MAX + 1;
650 » » /* negated classes */ 616 lit->position = -1;
651 » » if (neg.len) { 617 /* negated classes */
652 » » » nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a); 618 if (neg.len) {
653 » » » if (!nc) { 619 nc = tre_mem_alloc(ctx->mem, (neg.len + 1) * sizeof *neg.a);
654 » » » » err = REG_ESPACE; 620 if (!nc) {
655 » » » » goto parse_bracket_done; 621 err = REG_ESPACE;
656 » » » } 622 goto parse_bracket_done;
657 » » » memcpy(nc, neg.a, neg.len*sizeof *neg.a); 623 }
658 » » » nc[neg.len] = 0; 624 memcpy(nc, neg.a, neg.len * sizeof *neg.a);
659 » » } 625 nc[neg.len] = 0;
660 » } 626 }
661 627 }
662 » /* Build a union of the items in the array, negated if necessary. */ 628
663 » negmax = negmin = 0; 629 /* Build a union of the items in the array, negated if necessary. */
664 » for (i = 0; i < ls.len; i++) { 630 negmax = negmin = 0;
665 » » lit = ls.a[i]; 631 for (i = 0; i < ls.len; i++) {
666 » » min = lit->code_min; 632 lit = ls.a[i];
667 » » max = lit->code_max; 633 min = lit->code_min;
668 » » if (neg.negate) { 634 max = lit->code_max;
669 » » » if (min <= negmin) { 635 if (neg.negate) {
670 » » » » /* Overlap. */ 636 if (min <= negmin) {
671 » » » » negmin = MAX(max + 1, negmin); 637 /* Overlap. */
672 » » » » continue; 638 negmin = MAX(max + 1, negmin);
673 » » » } 639 continue;
674 » » » negmax = min - 1; 640 }
675 » » » lit->code_min = negmin; 641 negmax = min - 1;
676 » » » lit->code_max = negmax; 642 lit->code_min = negmin;
677 » » » negmin = max + 1; 643 lit->code_max = negmax;
678 » » } 644 negmin = max + 1;
679 » » lit->position = ctx->position; 645 }
680 » » lit->neg_classes = nc; 646 lit->position = ctx->position;
681 » » n = tre_ast_new_node(ctx->mem, LITERAL, lit); 647 lit->neg_classes = nc;
682 » » node = tre_ast_new_union(ctx->mem, node, n); 648 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
683 » » if (!node) { 649 node = tre_ast_new_union(ctx->mem, node, n);
684 » » » err = REG_ESPACE; 650 if (!node) {
685 » » » break; 651 err = REG_ESPACE;
686 » » } 652 break;
687 » } 653 }
654 }
688 655
689 parse_bracket_done: 656 parse_bracket_done:
690 » xfree(ls.a); 657 xfree(ls.a);
691 » ctx->position++; 658 ctx->position++;
692 » ctx->n = node; 659 ctx->n = node;
693 » return err; 660 return err;
694 } 661 }
695 662
696 static const char *parse_dup_count(const char *s, int *n) 663 static const char* parse_dup_count(const char* s, int* n) {
697 { 664 *n = -1;
698 » *n = -1; 665 if (!isdigit(*s))
699 » if (!isdigit(*s)) 666 return s;
700 » » return s; 667 *n = 0;
701 » *n = 0; 668 for (;;) {
702 » for (;;) { 669 *n = 10 * *n + (*s - '0');
703 » » *n = 10 * *n + (*s - '0'); 670 s++;
704 » » s++; 671 if (!isdigit(*s) || *n > RE_DUP_MAX)
705 » » if (!isdigit(*s) || *n > RE_DUP_MAX) 672 break;
706 » » » break; 673 }
707 » } 674 return s;
708 » return s; 675 }
709 } 676
710 677 static const char* parse_dup(const char* s, int ere, int* pmin, int* pmax) {
711 static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax) 678 int min, max;
712 { 679
713 » int min, max; 680 s = parse_dup_count(s, &min);
714 681 if (*s == ',')
715 » s = parse_dup_count(s, &min); 682 s = parse_dup_count(s + 1, &max);
716 » if (*s == ',') 683 else
717 » » s = parse_dup_count(s+1, &max); 684 max = min;
718 » else 685
719 » » max = min; 686 if ((max < min && max >= 0) || max > RE_DUP_MAX || min > RE_DUP_MAX ||
720 687 min < 0 || (!ere && *s++ != '\\') || *s++ != '}')
721 » if ( 688 return 0;
722 » » (max < min && max >= 0) || 689 *pmin = min;
723 » » max > RE_DUP_MAX || 690 *pmax = max;
724 » » min > RE_DUP_MAX || 691 return s;
725 » » min < 0 || 692 }
726 » » (!ere && *s++ != '\\') || 693
727 » » *s++ != '}' 694 static int hexval(unsigned c) {
728 » ) 695 if (c - '0' < 10)
729 » » return 0; 696 return c - '0';
730 » *pmin = min; 697 c |= 32;
731 » *pmax = max; 698 if (c - 'a' < 6)
732 » return s; 699 return c - 'a' + 10;
733 } 700 return -1;
734 701 }
735 static int hexval(unsigned c) 702
736 { 703 static reg_errcode_t marksub(tre_parse_ctx_t* ctx,
737 » if (c-'0'<10) return c-'0'; 704 tre_ast_node_t* node,
738 » c |= 32; 705 int subid) {
739 » if (c-'a'<6) return c-'a'+10; 706 if (node->submatch_id >= 0) {
740 » return -1; 707 tre_ast_node_t* n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
741 } 708 if (!n)
742 709 return REG_ESPACE;
743 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int sub id) 710 n = tre_ast_new_catenation(ctx->mem, n, node);
744 { 711 if (!n)
745 » if (node->submatch_id >= 0) { 712 return REG_ESPACE;
746 » » tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1) ; 713 n->num_submatches = node->num_submatches;
747 » » if (!n) 714 node = n;
748 » » » return REG_ESPACE; 715 }
749 » » n = tre_ast_new_catenation(ctx->mem, n, node); 716 node->submatch_id = subid;
750 » » if (!n) 717 node->num_submatches++;
751 » » » return REG_ESPACE; 718 ctx->n = node;
752 » » n->num_submatches = node->num_submatches; 719 return REG_OK;
753 » » node = n;
754 » }
755 » node->submatch_id = subid;
756 » node->num_submatches++;
757 » ctx->n = node;
758 » return REG_OK;
759 } 720 }
760 721
761 /* 722 /*
762 BRE grammar: 723 BRE grammar:
763 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$' 724 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^'
725 Branch '$'
764 Branch = Atom | Branch Atom 726 Branch = Atom | Branch Atom
765 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref 727 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch
766 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}' 728 '\)' | back_ref
729 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count
730 '\}'
767 731
768 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well) 732 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
769 733
770 ERE grammar: 734 ERE grammar:
771 Regex = Branch | Regex '|' Branch 735 Regex = Branch | Regex '|' Branch
772 Branch = Atom | Branch Atom 736 Branch = Atom | Branch Atom
773 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ') ' | '^' | '$' 737 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex
774 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}' 738 ')' | '^' | '$'
739 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count
740 ',' Count '}'
775 741
776 (a*+?, ^*, $+, \X, {, (|a) are unspecified) 742 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
777 */ 743 */
778 744
779 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s) 745 static reg_errcode_t parse_atom(tre_parse_ctx_t* ctx, const char* s) {
780 { 746 int len, ere = ctx->cflags & REG_EXTENDED;
781 » int len, ere = ctx->cflags & REG_EXTENDED; 747 const char* p;
782 » const char *p; 748 tre_ast_node_t* node;
783 » tre_ast_node_t *node; 749 wchar_t wc;
784 » wchar_t wc; 750 switch (*s) {
785 » switch (*s) { 751 case '[':
786 » case '[': 752 return parse_bracket(ctx, s + 1);
787 » » return parse_bracket(ctx, s+1); 753 case '\\':
788 » case '\\': 754 p = tre_expand_macro(s + 1);
789 » » p = tre_expand_macro(s+1); 755 if (p) {
790 » » if (p) { 756 /* assume \X expansion is a single atom */
791 » » » /* assume \X expansion is a single atom */ 757 reg_errcode_t err = parse_atom(ctx, p);
792 » » » reg_errcode_t err = parse_atom(ctx, p); 758 ctx->s = s + 2;
793 » » » ctx->s = s+2; 759 return err;
794 » » » return err; 760 }
795 » » } 761 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
796 » » /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */ 762 switch (*++s) {
797 » » switch (*++s) { 763 case 0:
798 » » case 0: 764 return REG_EESCAPE;
799 » » » return REG_EESCAPE; 765 case 'b':
800 » » case 'b': 766 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
801 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_WB, -1); 767 break;
802 » » » break; 768 case 'B':
803 » » case 'B': 769 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
804 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_WB_NEG, -1); 770 break;
805 » » » break; 771 case '<':
806 » » case '<': 772 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
807 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_BOW, -1); 773 break;
808 » » » break; 774 case '>':
809 » » case '>': 775 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
810 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_EOW, -1); 776 break;
811 » » » break; 777 case 'x':
812 » » case 'x': 778 s++;
813 » » » s++; 779 int i, v = 0, c;
814 » » » int i, v = 0, c; 780 len = 2;
815 » » » len = 2; 781 if (*s == '{') {
816 » » » if (*s == '{') { 782 len = 8;
817 » » » » len = 8; 783 s++;
818 » » » » s++; 784 }
819 » » » } 785 for (i = 0; i < len && v < 0x110000; i++) {
820 » » » for (i=0; i<len && v<0x110000; i++) { 786 c = hexval(s[i]);
821 » » » » c = hexval(s[i]); 787 if (c < 0)
822 » » » » if (c < 0) break; 788 break;
823 » » » » v = 16*v + c; 789 v = 16 * v + c;
824 » » » } 790 }
825 » » » s += i; 791 s += i;
826 » » » if (len == 8) { 792 if (len == 8) {
827 » » » » if (*s != '}') 793 if (*s != '}')
828 » » » » » return REG_EBRACE; 794 return REG_EBRACE;
829 » » » » s++; 795 s++;
830 » » » } 796 }
831 » » » node = tre_ast_new_literal(ctx->mem, v, v, ctx->position ++); 797 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
832 » » » s--; 798 s--;
833 » » » break; 799 break;
834 » » case '{': 800 case '{':
835 » » case '+': 801 case '+':
836 » » case '?': 802 case '?':
837 » » » /* extension: treat \+, \? as repetitions in BRE */ 803 /* extension: treat \+, \? as repetitions in BRE */
838 » » » /* reject repetitions after empty expression in BRE */ 804 /* reject repetitions after empty expression in BRE */
839 » » » if (!ere) 805 if (!ere)
840 » » » » return REG_BADRPT; 806 return REG_BADRPT;
841 » » case '|': 807 case '|':
842 » » » /* extension: treat \| as alternation in BRE */ 808 /* extension: treat \| as alternation in BRE */
843 » » » if (!ere) { 809 if (!ere) {
844 » » » » node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 810 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
845 » » » » s--; 811 s--;
846 » » » » goto end; 812 goto end;
847 » » » } 813 }
848 » » » /* fallthrough */ 814 /* fallthrough */
849 » » default: 815 default:
850 » » » if (!ere && (unsigned)*s-'1' < 9) { 816 if (!ere && (unsigned)*s - '1' < 9) {
851 » » » » /* back reference */ 817 /* back reference */
852 » » » » int val = *s - '0'; 818 int val = *s - '0';
853 » » » » node = tre_ast_new_literal(ctx->mem, BACKREF, va l, ctx->position++); 819 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
854 » » » » ctx->max_backref = MAX(val, ctx->max_backref); 820 ctx->max_backref = MAX(val, ctx->max_backref);
855 » » » } else { 821 } else {
856 » » » » /* extension: accept unknown escaped char 822 /* extension: accept unknown escaped char
857 » » » » as a literal */ 823 as a literal */
858 » » » » goto parse_literal; 824 goto parse_literal;
859 » » » } 825 }
860 » » } 826 }
861 » » s++; 827 s++;
862 » » break; 828 break;
863 » case '.': 829 case '.':
864 » » if (ctx->cflags & REG_NEWLINE) { 830 if (ctx->cflags & REG_NEWLINE) {
865 » » » tre_ast_node_t *tmp1, *tmp2; 831 tre_ast_node_t *tmp1, *tmp2;
866 » » » tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos ition++); 832 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n' - 1, ctx->position++);
867 » » » tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA X, ctx->position++); 833 tmp2 = tre_ast_new_literal(ctx->mem, '\n' + 1, TRE_CHAR_MAX,
868 » » » if (tmp1 && tmp2) 834 ctx->position++);
869 » » » » node = tre_ast_new_union(ctx->mem, tmp1, tmp2); 835 if (tmp1 && tmp2)
870 » » » else 836 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
871 » » » » node = 0; 837 else
872 » » } else { 838 node = 0;
873 » » » node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ct x->position++); 839 } else {
874 » » } 840 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
875 » » s++; 841 }
876 » » break; 842 s++;
877 » case '^': 843 break;
878 » » /* '^' has a special meaning everywhere in EREs, and at beginnin g of BRE. */ 844 case '^':
879 » » if (!ere && s != ctx->re) 845 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE.
880 » » » goto parse_literal; 846 */
881 » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, - 1); 847 if (!ere && s != ctx->re)
882 » » s++; 848 goto parse_literal;
883 » » break; 849 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
884 » case '$': 850 s++;
885 » » /* '$' is special everywhere in EREs, and in the end of the stri ng in BREs. */ 851 break;
886 » » if (!ere && s[1]) 852 case '$':
887 » » » goto parse_literal; 853 /* '$' is special everywhere in EREs, and in the end of the string in
888 » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, - 1); 854 * BREs. */
889 » » s++; 855 if (!ere && s[1])
890 » » break; 856 goto parse_literal;
891 » case '*': 857 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
892 » » return REG_BADPAT; 858 s++;
893 » case '{': 859 break;
894 » case '+': 860 case '*':
895 » case '?': 861 return REG_BADPAT;
896 » » /* reject repetitions after empty expression in ERE */ 862 case '{':
897 » » if (ere) 863 case '+':
898 » » » return REG_BADRPT; 864 case '?':
899 » case '|': 865 /* reject repetitions after empty expression in ERE */
900 » » if (!ere) 866 if (ere)
901 » » » goto parse_literal; 867 return REG_BADRPT;
902 » case 0: 868 case '|':
903 » » node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 869 if (!ere)
904 » » break; 870 goto parse_literal;
905 » default: 871 case 0:
906 parse_literal: 872 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
907 » » len = mbtowc(&wc, s, -1); 873 break;
908 » » if (len < 0) 874 default:
909 » » » return REG_BADPAT; 875 parse_literal:
910 » » if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(w c))) { 876 len = mbtowc(&wc, s, -1);
911 » » » tre_ast_node_t *tmp1, *tmp2; 877 if (len < 0)
912 » » » /* multiple opposite case characters are not supported * / 878 return REG_BADPAT;
913 » » » tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr e_toupper(wc), ctx->position); 879 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
914 » » » tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr e_tolower(wc), ctx->position); 880 tre_ast_node_t *tmp1, *tmp2;
915 » » » if (tmp1 && tmp2) 881 /* multiple opposite case characters are not supported */
916 » » » » node = tre_ast_new_union(ctx->mem, tmp1, tmp2); 882 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc),
917 » » » else 883 ctx->position);
918 » » » » node = 0; 884 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc),
919 » » } else { 885 ctx->position);
920 » » » node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi on); 886 if (tmp1 && tmp2)
921 » » } 887 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
922 » » ctx->position++; 888 else
923 » » s += len; 889 node = 0;
924 » » break; 890 } else {
925 » } 891 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
892 }
893 ctx->position++;
894 s += len;
895 break;
896 }
926 end: 897 end:
927 » if (!node) 898 if (!node)
928 » » return REG_ESPACE; 899 return REG_ESPACE;
929 » ctx->n = node; 900 ctx->n = node;
930 » ctx->s = s; 901 ctx->s = s;
931 » return REG_OK; 902 return REG_OK;
932 } 903 }
933 904
934 #define PUSHPTR(err, s, v) do { \ 905 #define PUSHPTR(err, s, v) \
935 » if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \ 906 do { \
936 » » return err; \ 907 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
937 } while(0) 908 return err; \
938 909 } while (0)
939 #define PUSHINT(err, s, v) do { \ 910
940 » if ((err = tre_stack_push_int(s, v)) != REG_OK) \ 911 #define PUSHINT(err, s, v) \
941 » » return err; \ 912 do { \
942 } while(0) 913 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
943 914 return err; \
944 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) 915 } while (0)
945 { 916
946 » tre_ast_node_t *nbranch=0, *nunion=0; 917 static reg_errcode_t tre_parse(tre_parse_ctx_t* ctx) {
947 » int ere = ctx->cflags & REG_EXTENDED; 918 tre_ast_node_t *nbranch = 0, *nunion = 0;
948 » const char *s = ctx->re; 919 int ere = ctx->cflags & REG_EXTENDED;
949 » int subid = 0; 920 const char* s = ctx->re;
950 » int depth = 0; 921 int subid = 0;
951 » reg_errcode_t err; 922 int depth = 0;
952 » tre_stack_t *stack = ctx->stack; 923 reg_errcode_t err;
953 924 tre_stack_t* stack = ctx->stack;
954 » PUSHINT(err, stack, subid++); 925
955 » for (;;) { 926 PUSHINT(err, stack, subid++);
956 » » if ((!ere && *s == '\\' && s[1] == '(') || 927 for (;;) {
957 » » (ere && *s == '(')) { 928 if ((!ere && *s == '\\' && s[1] == '(') || (ere && *s == '(')) {
958 » » » PUSHPTR(err, stack, nunion); 929 PUSHPTR(err, stack, nunion);
959 » » » PUSHPTR(err, stack, nbranch); 930 PUSHPTR(err, stack, nbranch);
960 » » » PUSHINT(err, stack, subid++); 931 PUSHINT(err, stack, subid++);
961 » » » s++; 932 s++;
962 » » » if (!ere) 933 if (!ere)
963 » » » » s++; 934 s++;
964 » » » depth++; 935 depth++;
965 » » » nbranch = nunion = 0; 936 nbranch = nunion = 0;
966 » » » continue; 937 continue;
967 » » } 938 }
968 » » if ((!ere && *s == '\\' && s[1] == ')') || 939 if ((!ere && *s == '\\' && s[1] == ')') || (ere && *s == ')' && depth)) {
969 » » (ere && *s == ')' && depth)) { 940 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
970 » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 941 if (!ctx->n)
971 » » » if (!ctx->n) 942 return REG_ESPACE;
972 » » » » return REG_ESPACE; 943 } else {
973 » » } else { 944 err = parse_atom(ctx, s);
974 » » » err = parse_atom(ctx, s); 945 if (err != REG_OK)
975 » » » if (err != REG_OK) 946 return err;
976 » » » » return err; 947 s = ctx->s;
977 » » » s = ctx->s; 948 }
978 » » } 949
979 950 parse_iter:
980 » parse_iter: 951 /* extension: repetitions are rejected after an empty node
981 » » /* extension: repetitions are rejected after an empty node 952 eg. (+), |*, {2}, but assertions are not treated as empty
982 » » eg. (+), |*, {2}, but assertions are not treated as empty 953 so ^* or $? are accepted currently. */
983 » » so ^* or $? are accepted currently. */ 954 for (;;) {
984 » » for (;;) { 955 int min, max;
985 » » » int min, max; 956
986 957 if (*s != '\\' && *s != '*') {
987 » » » if (*s!='\\' && *s!='*') { 958 if (!ere)
988 » » » » if (!ere) 959 break;
989 » » » » » break; 960 if (*s != '+' && *s != '?' && *s != '{')
990 » » » » if (*s!='+' && *s!='?' && *s!='{') 961 break;
991 » » » » » break; 962 }
992 » » » } 963 if (*s == '\\' && ere)
993 » » » if (*s=='\\' && ere) 964 break;
994 » » » » break; 965 /* extension: treat \+, \? as repetitions in BRE */
995 » » » /* extension: treat \+, \? as repetitions in BRE */ 966 if (*s == '\\' && s[1] != '+' && s[1] != '?' && s[1] != '{')
996 » » » if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{') 967 break;
997 » » » » break; 968 if (*s == '\\')
998 » » » if (*s=='\\') 969 s++;
999 » » » » s++; 970
1000 971 /* extension: multiple consecutive *+?{,} is unspecified,
1001 » » » /* extension: multiple consecutive *+?{,} is unspecified , 972 but (a+)+ has to be supported so accepting a++ makes
1002 » » » but (a+)+ has to be supported so accepting a++ makes 973 sense, note however that the RE_DUP_MAX limit can be
1003 » » » sense, note however that the RE_DUP_MAX limit can be 974 circumvented: (a{255}){255} uses a lot of memory.. */
1004 » » » circumvented: (a{255}){255} uses a lot of memory.. */ 975 if (*s == '{') {
1005 » » » if (*s=='{') { 976 s = parse_dup(s + 1, ere, &min, &max);
1006 » » » » s = parse_dup(s+1, ere, &min, &max); 977 if (!s)
1007 » » » » if (!s) 978 return REG_BADBR;
1008 » » » » » return REG_BADBR; 979 } else {
1009 » » » } else { 980 min = 0;
1010 » » » » min=0; 981 max = -1;
1011 » » » » max=-1; 982 if (*s == '+')
1012 » » » » if (*s == '+') 983 min = 1;
1013 » » » » » min = 1; 984 if (*s == '?')
1014 » » » » if (*s == '?') 985 max = 1;
1015 » » » » » max = 1; 986 s++;
1016 » » » » s++; 987 }
1017 » » » } 988 if (max == 0)
1018 » » » if (max == 0) 989 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1019 » » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1 , -1); 990 else
1020 » » » else 991 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1021 » » » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0); 992 if (!ctx->n)
1022 » » » if (!ctx->n) 993 return REG_ESPACE;
1023 » » » » return REG_ESPACE; 994 }
1024 » » } 995
1025 996 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1026 » » nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); 997 if ((ere && *s == '|') || (ere && *s == ')' && depth) ||
1027 » » if ((ere && *s == '|') || 998 (!ere && *s == '\\' && s[1] == ')') ||
1028 » » (ere && *s == ')' && depth) || 999 /* extension: treat \| as alternation in BRE */
1029 » » (!ere && *s == '\\' && s[1] == ')') || 1000 (!ere && *s == '\\' && s[1] == '|') || !*s) {
1030 » » /* extension: treat \| as alternation in BRE */ 1001 /* extension: empty branch is unspecified (), (|a), (a|)
1031 » » (!ere && *s == '\\' && s[1] == '|') || 1002 here they are not rejected but match on empty string */
1032 » » !*s) { 1003 int c = *s;
1033 » » » /* extension: empty branch is unspecified (), (|a), (a|) 1004 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1034 » » » here they are not rejected but match on empty string */ 1005 nbranch = 0;
1035 » » » int c = *s; 1006
1036 » » » nunion = tre_ast_new_union(ctx->mem, nunion, nbranch); 1007 if (c == '\\' && s[1] == '|') {
1037 » » » nbranch = 0; 1008 s += 2;
1038 1009 } else if (c == '|') {
1039 » » » if (c == '\\' && s[1] == '|') { 1010 s++;
1040 » » » » s+=2; 1011 } else {
1041 » » » } else if (c == '|') { 1012 if (c == '\\') {
1042 » » » » s++; 1013 if (!depth)
1043 » » » } else { 1014 return REG_EPAREN;
1044 » » » » if (c == '\\') { 1015 s += 2;
1045 » » » » » if (!depth) return REG_EPAREN; 1016 } else if (c == ')')
1046 » » » » » s+=2; 1017 s++;
1047 » » » » } else if (c == ')') 1018 depth--;
1048 » » » » » s++; 1019 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1049 » » » » depth--; 1020 if (err != REG_OK)
1050 » » » » err = marksub(ctx, nunion, tre_stack_pop_int(sta ck)); 1021 return err;
1051 » » » » if (err != REG_OK) 1022 if (!c && depth < 0) {
1052 » » » » » return err; 1023 ctx->submatch_id = subid;
1053 » » » » if (!c && depth<0) { 1024 return REG_OK;
1054 » » » » » ctx->submatch_id = subid; 1025 }
1055 » » » » » return REG_OK; 1026 if (!c || depth < 0)
1056 » » » » } 1027 return REG_EPAREN;
1057 » » » » if (!c || depth<0) 1028 nbranch = tre_stack_pop_voidptr(stack);
1058 » » » » » return REG_EPAREN; 1029 nunion = tre_stack_pop_voidptr(stack);
1059 » » » » nbranch = tre_stack_pop_voidptr(stack); 1030 goto parse_iter;
1060 » » » » nunion = tre_stack_pop_voidptr(stack); 1031 }
1061 » » » » goto parse_iter; 1032 }
1062 » » » } 1033 }
1063 » » } 1034 }
1064 » }
1065 }
1066
1067 1035
1068 /*********************************************************************** 1036 /***********************************************************************
1069 from tre-compile.c 1037 from tre-compile.c
1070 ***********************************************************************/ 1038 ***********************************************************************/
1071 1039
1072
1073 /* 1040 /*
1074 TODO: 1041 TODO:
1075 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive 1042 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1076 function calls. 1043 function calls.
1077 */ 1044 */
1078 1045
1079 /* 1046 /*
1080 Algorithms to setup tags so that submatch addressing can be done. 1047 Algorithms to setup tags so that submatch addressing can be done.
1081 */ 1048 */
1082 1049
1083
1084 /* Inserts a catenation node to the root of the tree given in `node'. 1050 /* Inserts a catenation node to the root of the tree given in `node'.
1085 As the left child a new tag with number `tag_id' to `node' is added, 1051 As the left child a new tag with number `tag_id' to `node' is added,
1086 and the right child is the old root. */ 1052 and the right child is the old root. */
1087 static reg_errcode_t 1053 static reg_errcode_t tre_add_tag_left(tre_mem_t mem,
1088 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id) 1054 tre_ast_node_t* node,
1089 { 1055 int tag_id) {
1090 tre_catenation_t *c; 1056 tre_catenation_t* c;
1091 1057
1092 c = tre_mem_alloc(mem, sizeof(*c)); 1058 c = tre_mem_alloc(mem, sizeof(*c));
1093 if (c == NULL) 1059 if (c == NULL)
1094 return REG_ESPACE; 1060 return REG_ESPACE;
1095 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); 1061 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1096 if (c->left == NULL) 1062 if (c->left == NULL)
1097 return REG_ESPACE; 1063 return REG_ESPACE;
1098 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); 1064 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1099 if (c->right == NULL) 1065 if (c->right == NULL)
1100 return REG_ESPACE; 1066 return REG_ESPACE;
1101 1067
1102 c->right->obj = node->obj; 1068 c->right->obj = node->obj;
1103 c->right->type = node->type; 1069 c->right->type = node->type;
1104 c->right->nullable = -1; 1070 c->right->nullable = -1;
1105 c->right->submatch_id = -1; 1071 c->right->submatch_id = -1;
1106 c->right->firstpos = NULL; 1072 c->right->firstpos = NULL;
1107 c->right->lastpos = NULL; 1073 c->right->lastpos = NULL;
1108 c->right->num_tags = 0; 1074 c->right->num_tags = 0;
1109 node->obj = c; 1075 node->obj = c;
1110 node->type = CATENATION; 1076 node->type = CATENATION;
1111 return REG_OK; 1077 return REG_OK;
1112 } 1078 }
1113 1079
1114 /* Inserts a catenation node to the root of the tree given in `node'. 1080 /* Inserts a catenation node to the root of the tree given in `node'.
1115 As the right child a new tag with number `tag_id' to `node' is added, 1081 As the right child a new tag with number `tag_id' to `node' is added,
1116 and the left child is the old root. */ 1082 and the left child is the old root. */
1117 static reg_errcode_t 1083 static reg_errcode_t tre_add_tag_right(tre_mem_t mem,
1118 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id) 1084 tre_ast_node_t* node,
1119 { 1085 int tag_id) {
1120 tre_catenation_t *c; 1086 tre_catenation_t* c;
1121 1087
1122 c = tre_mem_alloc(mem, sizeof(*c)); 1088 c = tre_mem_alloc(mem, sizeof(*c));
1123 if (c == NULL) 1089 if (c == NULL)
1124 return REG_ESPACE; 1090 return REG_ESPACE;
1125 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1); 1091 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1126 if (c->right == NULL) 1092 if (c->right == NULL)
1127 return REG_ESPACE; 1093 return REG_ESPACE;
1128 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); 1094 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1129 if (c->left == NULL) 1095 if (c->left == NULL)
1130 return REG_ESPACE; 1096 return REG_ESPACE;
(...skipping 13 matching lines...) Expand all
1144 typedef enum { 1110 typedef enum {
1145 ADDTAGS_RECURSE, 1111 ADDTAGS_RECURSE,
1146 ADDTAGS_AFTER_ITERATION, 1112 ADDTAGS_AFTER_ITERATION,
1147 ADDTAGS_AFTER_UNION_LEFT, 1113 ADDTAGS_AFTER_UNION_LEFT,
1148 ADDTAGS_AFTER_UNION_RIGHT, 1114 ADDTAGS_AFTER_UNION_RIGHT,
1149 ADDTAGS_AFTER_CAT_LEFT, 1115 ADDTAGS_AFTER_CAT_LEFT,
1150 ADDTAGS_AFTER_CAT_RIGHT, 1116 ADDTAGS_AFTER_CAT_RIGHT,
1151 ADDTAGS_SET_SUBMATCH_END 1117 ADDTAGS_SET_SUBMATCH_END
1152 } tre_addtags_symbol_t; 1118 } tre_addtags_symbol_t;
1153 1119
1154
1155 typedef struct { 1120 typedef struct {
1156 int tag; 1121 int tag;
1157 int next_tag; 1122 int next_tag;
1158 } tre_tag_states_t; 1123 } tre_tag_states_t;
1159 1124
1160
1161 /* Go through `regset' and set submatch data for submatches that are 1125 /* Go through `regset' and set submatch data for submatches that are
1162 using this tag. */ 1126 using this tag. */
1163 static void 1127 static void tre_purge_regset(int* regset, tre_tnfa_t* tnfa, int tag) {
1164 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1165 {
1166 int i; 1128 int i;
1167 1129
1168 for (i = 0; regset[i] >= 0; i++) 1130 for (i = 0; regset[i] >= 0; i++) {
1169 { 1131 int id = regset[i] / 2;
1170 int id = regset[i] / 2; 1132 int start = !(regset[i] % 2);
1171 int start = !(regset[i] % 2); 1133 if (start)
1172 if (start) 1134 tnfa->submatch_data[id].so_tag = tag;
1173 » tnfa->submatch_data[id].so_tag = tag; 1135 else
1174 else 1136 tnfa->submatch_data[id].eo_tag = tag;
1175 » tnfa->submatch_data[id].eo_tag = tag; 1137 }
1176 }
1177 regset[0] = -1; 1138 regset[0] = -1;
1178 } 1139 }
1179 1140
1180
1181 /* Adds tags to appropriate locations in the parse tree in `tree', so that 1141 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1182 subexpressions marked for submatch addressing can be traced. */ 1142 subexpressions marked for submatch addressing can be traced. */
1183 static reg_errcode_t 1143 static reg_errcode_t tre_add_tags(tre_mem_t mem,
1184 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, 1144 tre_stack_t* stack,
1185 » tre_tnfa_t *tnfa) 1145 tre_ast_node_t* tree,
1186 { 1146 tre_tnfa_t* tnfa) {
1187 reg_errcode_t status = REG_OK; 1147 reg_errcode_t status = REG_OK;
1188 tre_addtags_symbol_t symbol; 1148 tre_addtags_symbol_t symbol;
1189 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ 1149 tre_ast_node_t* node = tree; /* Tree node we are currently looking at. */
1190 int bottom = tre_stack_num_objects(stack); 1150 int bottom = tre_stack_num_objects(stack);
1191 /* True for first pass (counting number of needed tags) */ 1151 /* True for first pass (counting number of needed tags) */
1192 int first_pass = (mem == NULL || tnfa == NULL); 1152 int first_pass = (mem == NULL || tnfa == NULL);
1193 int *regset, *orig_regset; 1153 int *regset, *orig_regset;
1194 int num_tags = 0; /* Total number of tags. */ 1154 int num_tags = 0; /* Total number of tags. */
1195 int num_minimals = 0;» /* Number of special minimal tags. */ 1155 int num_minimals = 0; /* Number of special minimal tags. */
1196 int tag = 0;» /* The tag that is to be added next. */ 1156 int tag = 0; /* The tag that is to be added next. */
1197 int next_tag = 1; /* Next tag to use after this one. */ 1157 int next_tag = 1; /* Next tag to use after this one. */
1198 int *parents;» /* Stack of submatches the current submatch is 1158 int* parents; /* Stack of submatches the current submatch is
1199 » » contained in. */ 1159 contained in. */
1200 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ 1160 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1201 tre_tag_states_t *saved_states; 1161 tre_tag_states_t* saved_states;
1202 1162
1203 tre_tag_direction_t direction = TRE_TAG_MINIMIZE; 1163 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1204 if (!first_pass) 1164 if (!first_pass) {
1205 { 1165 tnfa->end_tag = 0;
1206 tnfa->end_tag = 0; 1166 tnfa->minimal_tags[0] = -1;
1207 tnfa->minimal_tags[0] = -1; 1167 }
1208 }
1209 1168
1210 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); 1169 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1211 if (regset == NULL) 1170 if (regset == NULL)
1212 return REG_ESPACE; 1171 return REG_ESPACE;
1213 regset[0] = -1; 1172 regset[0] = -1;
1214 orig_regset = regset; 1173 orig_regset = regset;
1215 1174
1216 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); 1175 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1217 if (parents == NULL) 1176 if (parents == NULL) {
1218 { 1177 xfree(regset);
1219 xfree(regset); 1178 return REG_ESPACE;
1220 return REG_ESPACE; 1179 }
1221 }
1222 parents[0] = -1; 1180 parents[0] = -1;
1223 1181
1224 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); 1182 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1225 if (saved_states == NULL) 1183 if (saved_states == NULL) {
1226 { 1184 xfree(regset);
1227 xfree(regset); 1185 xfree(parents);
1228 xfree(parents); 1186 return REG_ESPACE;
1229 return REG_ESPACE; 1187 } else {
1230 } 1188 unsigned int i;
1231 else 1189 for (i = 0; i <= tnfa->num_submatches; i++)
1232 { 1190 saved_states[i].tag = -1;
1233 unsigned int i; 1191 }
1234 for (i = 0; i <= tnfa->num_submatches; i++)
1235 » saved_states[i].tag = -1;
1236 }
1237 1192
1238 STACK_PUSH(stack, voidptr, node); 1193 STACK_PUSH(stack, voidptr, node);
1239 STACK_PUSH(stack, int, ADDTAGS_RECURSE); 1194 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1240 1195
1241 while (tre_stack_num_objects(stack) > bottom) 1196 while (tre_stack_num_objects(stack) > bottom) {
1242 { 1197 if (status != REG_OK)
1243 if (status != REG_OK) 1198 break;
1244 break; 1199
1245 1200 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1246 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack); 1201 switch (symbol) {
1247 switch (symbol) 1202 case ADDTAGS_SET_SUBMATCH_END: {
1248 { 1203 int id = tre_stack_pop_int(stack);
1249 1204 int i;
1250 case ADDTAGS_SET_SUBMATCH_END: 1205
1251 { 1206 /* Add end of this submatch to regset. */
1252 int id = tre_stack_pop_int(stack); 1207 for (i = 0; regset[i] >= 0; i++)
1253 int i; 1208 ;
1254 1209 regset[i] = id * 2 + 1;
1255 /* Add end of this submatch to regset. */ 1210 regset[i + 1] = -1;
1256 for (i = 0; regset[i] >= 0; i++); 1211
1257 regset[i] = id * 2 + 1; 1212 /* Pop this submatch from the parents stack. */
1258 regset[i + 1] = -1; 1213 for (i = 0; parents[i] >= 0; i++)
1259 1214 ;
1260 /* Pop this submatch from the parents stack. */ 1215 parents[i - 1] = -1;
1261 for (i = 0; parents[i] >= 0; i++); 1216 break;
1262 parents[i - 1] = -1; 1217 }
1263 break; 1218
1264 } 1219 case ADDTAGS_RECURSE:
1265 1220 node = tre_stack_pop_voidptr(stack);
1266 case ADDTAGS_RECURSE: 1221
1267 node = tre_stack_pop_voidptr(stack); 1222 if (node->submatch_id >= 0) {
1268 1223 int id = node->submatch_id;
1269 if (node->submatch_id >= 0) 1224 int i;
1270 { 1225
1271 int id = node->submatch_id; 1226 /* Add start of this submatch to regset. */
1272 int i; 1227 for (i = 0; regset[i] >= 0; i++)
1273 1228 ;
1274 1229 regset[i] = id * 2;
1275 /* Add start of this submatch to regset. */ 1230 regset[i + 1] = -1;
1276 for (i = 0; regset[i] >= 0; i++); 1231
1277 regset[i] = id * 2; 1232 if (!first_pass) {
1278 regset[i + 1] = -1; 1233 for (i = 0; parents[i] >= 0; i++)
1279 1234 ;
1280 if (!first_pass) 1235 tnfa->submatch_data[id].parents = NULL;
1281 { 1236 if (i > 0) {
1282 for (i = 0; parents[i] >= 0; i++); 1237 int* p = xmalloc(sizeof(*p) * (i + 1));
1283 tnfa->submatch_data[id].parents = NULL; 1238 if (p == NULL) {
1284 if (i > 0) 1239 status = REG_ESPACE;
1285 { 1240 break;
1286 int *p = xmalloc(sizeof(*p) * (i + 1)); 1241 }
1287 if (p == NULL) 1242 assert(tnfa->submatch_data[id].parents == NULL);
1288 { 1243 tnfa->submatch_data[id].parents = p;
1289 status = REG_ESPACE; 1244 for (i = 0; parents[i] >= 0; i++)
1290 break; 1245 p[i] = parents[i];
1291 } 1246 p[i] = -1;
1292 assert(tnfa->submatch_data[id].parents == NULL); 1247 }
1293 tnfa->submatch_data[id].parents = p; 1248 }
1294 for (i = 0; parents[i] >= 0; i++) 1249
1295 p[i] = parents[i]; 1250 /* Add end of this submatch to regset after processing this
1296 p[i] = -1; 1251 node. */
1297 } 1252 STACK_PUSHX(stack, int, node->submatch_id);
1298 } 1253 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1299 1254 }
1300 /* Add end of this submatch to regset after processing this 1255
1301 node. */ 1256 switch (node->type) {
1302 STACK_PUSHX(stack, int, node->submatch_id); 1257 case LITERAL: {
1303 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); 1258 tre_literal_t* lit = node->obj;
1304 } 1259
1305 1260 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) {
1306 switch (node->type) 1261 int i;
1307 { 1262 if (regset[0] >= 0) {
1308 case LITERAL: 1263 /* Regset is not empty, so add a tag before the
1309 { 1264 literal or backref. */
1310 tre_literal_t *lit = node->obj; 1265 if (!first_pass) {
1311 1266 status = tre_add_tag_left(mem, node, tag);
1312 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 1267 tnfa->tag_directions[tag] = direction;
1313 { 1268 if (minimal_tag >= 0) {
1314 int i; 1269 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1315 if (regset[0] >= 0) 1270 ;
1316 { 1271 tnfa->minimal_tags[i] = tag;
1317 /* Regset is not empty, so add a tag before the 1272 tnfa->minimal_tags[i + 1] = minimal_tag;
1318 literal or backref. */ 1273 tnfa->minimal_tags[i + 2] = -1;
1319 if (!first_pass) 1274 minimal_tag = -1;
1320 { 1275 num_minimals++;
1321 status = tre_add_tag_left(mem, node, tag); 1276 }
1322 tnfa->tag_directions[tag] = direction; 1277 tre_purge_regset(regset, tnfa, tag);
1323 if (minimal_tag >= 0) 1278 } else {
1324 { 1279 node->num_tags = 1;
1325 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1280 }
1326 tnfa->minimal_tags[i] = tag; 1281
1327 tnfa->minimal_tags[i + 1] = minimal_tag; 1282 regset[0] = -1;
1328 tnfa->minimal_tags[i + 2] = -1; 1283 tag = next_tag;
1329 minimal_tag = -1; 1284 num_tags++;
1330 num_minimals++; 1285 next_tag++;
1331 } 1286 }
1332 tre_purge_regset(regset, tnfa, tag); 1287 } else {
1333 } 1288 assert(!IS_TAG(lit));
1334 else 1289 }
1335 { 1290 break;
1336 node->num_tags = 1; 1291 }
1337 } 1292 case CATENATION: {
1338 1293 tre_catenation_t* cat = node->obj;
1339 regset[0] = -1; 1294 tre_ast_node_t* left = cat->left;
1340 tag = next_tag; 1295 tre_ast_node_t* right = cat->right;
1341 num_tags++; 1296 int reserved_tag = -1;
1342 next_tag++; 1297
1343 } 1298 /* After processing right child. */
1344 } 1299 STACK_PUSHX(stack, voidptr, node);
1345 else 1300 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1346 { 1301
1347 assert(!IS_TAG(lit)); 1302 /* Process right child. */
1348 } 1303 STACK_PUSHX(stack, voidptr, right);
1349 break; 1304 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1350 } 1305
1351 case CATENATION: 1306 /* After processing left child. */
1352 { 1307 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1353 tre_catenation_t *cat = node->obj; 1308 if (left->num_tags > 0 && right->num_tags > 0) {
1354 tre_ast_node_t *left = cat->left; 1309 /* Reserve the next tag to the right child. */
1355 tre_ast_node_t *right = cat->right; 1310 reserved_tag = next_tag;
1356 int reserved_tag = -1; 1311 next_tag++;
1357 1312 }
1358 1313 STACK_PUSHX(stack, int, reserved_tag);
1359 /* After processing right child. */ 1314 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1360 STACK_PUSHX(stack, voidptr, node); 1315
1361 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); 1316 /* Process left child. */
1362 1317 STACK_PUSHX(stack, voidptr, left);
1363 /* Process right child. */ 1318 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1364 STACK_PUSHX(stack, voidptr, right); 1319
1365 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1320 } break;
1366 1321 case ITERATION: {
1367 /* After processing left child. */ 1322 tre_iteration_t* iter = node->obj;
1368 STACK_PUSHX(stack, int, next_tag + left->num_tags); 1323
1369 if (left->num_tags > 0 && right->num_tags > 0) 1324 if (first_pass) {
1370 { 1325 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1371 /* Reserve the next tag to the right child. */ 1326 } else {
1372 reserved_tag = next_tag; 1327 STACK_PUSHX(stack, int, tag);
1373 next_tag++; 1328 STACK_PUSHX(stack, int, iter->minimal);
1374 } 1329 }
1375 STACK_PUSHX(stack, int, reserved_tag); 1330 STACK_PUSHX(stack, voidptr, node);
1376 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); 1331 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1377 1332
1378 /* Process left child. */ 1333 STACK_PUSHX(stack, voidptr, iter->arg);
1379 STACK_PUSHX(stack, voidptr, left); 1334 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1380 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1335
1381 1336 /* Regset is not empty, so add a tag here. */
1382 } 1337 if (regset[0] >= 0 || iter->minimal) {
1383 break; 1338 if (!first_pass) {
1384 case ITERATION: 1339 int i;
1385 { 1340 status = tre_add_tag_left(mem, node, tag);
1386 tre_iteration_t *iter = node->obj; 1341 if (iter->minimal)
1387 1342 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1388 if (first_pass) 1343 else
1389 { 1344 tnfa->tag_directions[tag] = direction;
1390 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); 1345 if (minimal_tag >= 0) {
1391 } 1346 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1392 else 1347 ;
1393 { 1348 tnfa->minimal_tags[i] = tag;
1394 STACK_PUSHX(stack, int, tag); 1349 tnfa->minimal_tags[i + 1] = minimal_tag;
1395 STACK_PUSHX(stack, int, iter->minimal); 1350 tnfa->minimal_tags[i + 2] = -1;
1396 } 1351 minimal_tag = -1;
1397 STACK_PUSHX(stack, voidptr, node); 1352 num_minimals++;
1398 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); 1353 }
1399 1354 tre_purge_regset(regset, tnfa, tag);
1400 STACK_PUSHX(stack, voidptr, iter->arg); 1355 }
1401 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1356
1402 1357 regset[0] = -1;
1403 /* Regset is not empty, so add a tag here. */ 1358 tag = next_tag;
1404 if (regset[0] >= 0 || iter->minimal) 1359 num_tags++;
1405 { 1360 next_tag++;
1406 if (!first_pass) 1361 }
1407 { 1362 direction = TRE_TAG_MINIMIZE;
1408 int i; 1363 } break;
1409 status = tre_add_tag_left(mem, node, tag); 1364 case UNION: {
1410 if (iter->minimal) 1365 tre_union_t* uni = node->obj;
1411 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; 1366 tre_ast_node_t* left = uni->left;
1412 else 1367 tre_ast_node_t* right = uni->right;
1413 tnfa->tag_directions[tag] = direction; 1368 int left_tag;
1414 if (minimal_tag >= 0) 1369 int right_tag;
1415 { 1370
1416 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1371 if (regset[0] >= 0) {
1417 tnfa->minimal_tags[i] = tag; 1372 left_tag = next_tag;
1418 tnfa->minimal_tags[i + 1] = minimal_tag; 1373 right_tag = next_tag + 1;
1419 tnfa->minimal_tags[i + 2] = -1; 1374 } else {
1420 minimal_tag = -1; 1375 left_tag = tag;
1421 num_minimals++; 1376 right_tag = next_tag;
1422 } 1377 }
1423 tre_purge_regset(regset, tnfa, tag); 1378
1424 } 1379 /* After processing right child. */
1425 1380 STACK_PUSHX(stack, int, right_tag);
1426 regset[0] = -1; 1381 STACK_PUSHX(stack, int, left_tag);
1427 tag = next_tag; 1382 STACK_PUSHX(stack, voidptr, regset);
1428 num_tags++; 1383 STACK_PUSHX(stack, int, regset[0] >= 0);
1429 next_tag++; 1384 STACK_PUSHX(stack, voidptr, node);
1430 } 1385 STACK_PUSHX(stack, voidptr, right);
1431 direction = TRE_TAG_MINIMIZE; 1386 STACK_PUSHX(stack, voidptr, left);
1432 } 1387 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1433 break; 1388
1434 case UNION: 1389 /* Process right child. */
1435 { 1390 STACK_PUSHX(stack, voidptr, right);
1436 tre_union_t *uni = node->obj; 1391 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1437 tre_ast_node_t *left = uni->left; 1392
1438 tre_ast_node_t *right = uni->right; 1393 /* After processing left child. */
1439 int left_tag; 1394 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1440 int right_tag; 1395
1441 1396 /* Process left child. */
1442 if (regset[0] >= 0) 1397 STACK_PUSHX(stack, voidptr, left);
1443 { 1398 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1444 left_tag = next_tag; 1399
1445 right_tag = next_tag + 1; 1400 /* Regset is not empty, so add a tag here. */
1446 } 1401 if (regset[0] >= 0) {
1447 else 1402 if (!first_pass) {
1448 { 1403 int i;
1449 left_tag = tag; 1404 status = tre_add_tag_left(mem, node, tag);
1450 right_tag = next_tag; 1405 tnfa->tag_directions[tag] = direction;
1451 } 1406 if (minimal_tag >= 0) {
1452 1407 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1453 /* After processing right child. */ 1408 ;
1454 STACK_PUSHX(stack, int, right_tag); 1409 tnfa->minimal_tags[i] = tag;
1455 STACK_PUSHX(stack, int, left_tag); 1410 tnfa->minimal_tags[i + 1] = minimal_tag;
1456 STACK_PUSHX(stack, voidptr, regset); 1411 tnfa->minimal_tags[i + 2] = -1;
1457 STACK_PUSHX(stack, int, regset[0] >= 0); 1412 minimal_tag = -1;
1458 STACK_PUSHX(stack, voidptr, node); 1413 num_minimals++;
1459 STACK_PUSHX(stack, voidptr, right); 1414 }
1460 STACK_PUSHX(stack, voidptr, left); 1415 tre_purge_regset(regset, tnfa, tag);
1461 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); 1416 }
1462 1417
1463 /* Process right child. */ 1418 regset[0] = -1;
1464 STACK_PUSHX(stack, voidptr, right); 1419 tag = next_tag;
1465 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1420 num_tags++;
1466 1421 next_tag++;
1467 /* After processing left child. */ 1422 }
1468 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); 1423
1469 1424 if (node->num_submatches > 0) {
1470 /* Process left child. */ 1425 /* The next two tags are reserved for markers. */
1471 STACK_PUSHX(stack, voidptr, left); 1426 next_tag++;
1472 STACK_PUSHX(stack, int, ADDTAGS_RECURSE); 1427 tag = next_tag;
1473 1428 next_tag++;
1474 /* Regset is not empty, so add a tag here. */ 1429 }
1475 if (regset[0] >= 0) 1430
1476 { 1431 break;
1477 if (!first_pass) 1432 }
1478 { 1433 }
1479 int i; 1434
1480 status = tre_add_tag_left(mem, node, tag); 1435 if (node->submatch_id >= 0) {
1481 tnfa->tag_directions[tag] = direction; 1436 int i;
1482 if (minimal_tag >= 0) 1437 /* Push this submatch on the parents stack. */
1483 { 1438 for (i = 0; parents[i] >= 0; i++)
1484 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1439 ;
1485 tnfa->minimal_tags[i] = tag; 1440 parents[i] = node->submatch_id;
1486 tnfa->minimal_tags[i + 1] = minimal_tag; 1441 parents[i + 1] = -1;
1487 tnfa->minimal_tags[i + 2] = -1; 1442 }
1488 minimal_tag = -1; 1443
1489 num_minimals++; 1444 break; /* end case: ADDTAGS_RECURSE */
1490 } 1445
1491 tre_purge_regset(regset, tnfa, tag); 1446 case ADDTAGS_AFTER_ITERATION: {
1492 } 1447 int minimal = 0;
1493 1448 int enter_tag;
1494 regset[0] = -1; 1449 node = tre_stack_pop_voidptr(stack);
1495 tag = next_tag; 1450 if (first_pass) {
1496 num_tags++; 1451 node->num_tags = ((tre_iteration_t*)node->obj)->arg->num_tags +
1497 next_tag++; 1452 tre_stack_pop_int(stack);
1498 } 1453 minimal_tag = -1;
1499 1454 } else {
1500 if (node->num_submatches > 0) 1455 minimal = tre_stack_pop_int(stack);
1501 { 1456 enter_tag = tre_stack_pop_int(stack);
1502 /* The next two tags are reserved for markers. */ 1457 if (minimal)
1503 next_tag++; 1458 minimal_tag = enter_tag;
1504 tag = next_tag; 1459 }
1505 next_tag++; 1460
1506 } 1461 if (!first_pass) {
1507 1462 if (minimal)
1508 break; 1463 direction = TRE_TAG_MINIMIZE;
1509 } 1464 else
1510 } 1465 direction = TRE_TAG_MAXIMIZE;
1511 1466 }
1512 if (node->submatch_id >= 0) 1467 break;
1513 { 1468 }
1514 int i; 1469
1515 /* Push this submatch on the parents stack. */ 1470 case ADDTAGS_AFTER_CAT_LEFT: {
1516 for (i = 0; parents[i] >= 0; i++); 1471 int new_tag = tre_stack_pop_int(stack);
1517 parents[i] = node->submatch_id; 1472 next_tag = tre_stack_pop_int(stack);
1518 parents[i + 1] = -1; 1473 if (new_tag >= 0) {
1519 } 1474 tag = new_tag;
1520 1475 }
1521 break; /* end case: ADDTAGS_RECURSE */ 1476 break;
1522 1477 }
1523 case ADDTAGS_AFTER_ITERATION: 1478
1524 { 1479 case ADDTAGS_AFTER_CAT_RIGHT:
1525 int minimal = 0; 1480 node = tre_stack_pop_voidptr(stack);
1526 int enter_tag; 1481 if (first_pass)
1527 node = tre_stack_pop_voidptr(stack); 1482 node->num_tags = ((tre_catenation_t*)node->obj)->left->num_tags +
1528 if (first_pass) 1483 ((tre_catenation_t*)node->obj)->right->num_tags;
1529 { 1484 break;
1530 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags 1485
1531 + tre_stack_pop_int(stack); 1486 case ADDTAGS_AFTER_UNION_LEFT:
1532 minimal_tag = -1; 1487 /* Lift the bottom of the `regset' array so that when processing
1533 } 1488 the right operand the items currently in the array are
1534 else 1489 invisible. The original bottom was saved at ADDTAGS_UNION and
1535 { 1490 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1536 minimal = tre_stack_pop_int(stack); 1491 while (*regset >= 0)
1537 enter_tag = tre_stack_pop_int(stack); 1492 regset++;
1538 if (minimal) 1493 break;
1539 minimal_tag = enter_tag; 1494
1540 } 1495 case ADDTAGS_AFTER_UNION_RIGHT: {
1541 1496 int added_tags, tag_left, tag_right;
1542 if (!first_pass) 1497 tre_ast_node_t* left = tre_stack_pop_voidptr(stack);
1543 { 1498 tre_ast_node_t* right = tre_stack_pop_voidptr(stack);
1544 if (minimal) 1499 node = tre_stack_pop_voidptr(stack);
1545 direction = TRE_TAG_MINIMIZE; 1500 added_tags = tre_stack_pop_int(stack);
1546 else 1501 if (first_pass) {
1547 direction = TRE_TAG_MAXIMIZE; 1502 node->num_tags = ((tre_union_t*)node->obj)->left->num_tags +
1548 } 1503 ((tre_union_t*)node->obj)->right->num_tags +
1549 break; 1504 added_tags + ((node->num_submatches > 0) ? 2 : 0);
1550 } 1505 }
1551 1506 regset = tre_stack_pop_voidptr(stack);
1552 case ADDTAGS_AFTER_CAT_LEFT: 1507 tag_left = tre_stack_pop_int(stack);
1553 { 1508 tag_right = tre_stack_pop_int(stack);
1554 int new_tag = tre_stack_pop_int(stack); 1509
1555 next_tag = tre_stack_pop_int(stack); 1510 /* Add tags after both children, the left child gets a smaller
1556 if (new_tag >= 0) 1511 tag than the right child. This guarantees that we prefer
1557 { 1512 the left child over the right child. */
1558 tag = new_tag; 1513 /* XXX - This is not always necessary (if the children have
1559 } 1514 tags which must be seen for every match of that child). */
1560 break; 1515 /* XXX - Check if this is the only place where tre_add_tag_right
1561 } 1516 is used. If so, use tre_add_tag_left (putting the tag before
1562 1517 the child as opposed after the child) and throw away
1563 case ADDTAGS_AFTER_CAT_RIGHT: 1518 tre_add_tag_right. */
1564 node = tre_stack_pop_voidptr(stack); 1519 if (node->num_submatches > 0) {
1565 if (first_pass) 1520 if (!first_pass) {
1566 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags 1521 status = tre_add_tag_right(mem, left, tag_left);
1567 + ((tre_catenation_t *)node->obj)->right->num_tags; 1522 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1568 break; 1523 if (status == REG_OK)
1569 1524 status = tre_add_tag_right(mem, right, tag_right);
1570 case ADDTAGS_AFTER_UNION_LEFT: 1525 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1571 /* Lift the bottom of the `regset' array so that when processing 1526 }
1572 the right operand the items currently in the array are 1527 num_tags += 2;
1573 invisible. The original bottom was saved at ADDTAGS_UNION and 1528 }
1574 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ 1529 direction = TRE_TAG_MAXIMIZE;
1575 while (*regset >= 0) 1530 break;
1576 regset++; 1531 }
1577 break; 1532
1578 1533 default:
1579 case ADDTAGS_AFTER_UNION_RIGHT: 1534 assert(0);
1580 { 1535 break;
1581 int added_tags, tag_left, tag_right; 1536
1582 tre_ast_node_t *left = tre_stack_pop_voidptr(stack); 1537 } /* end switch(symbol) */
1583 tre_ast_node_t *right = tre_stack_pop_voidptr(stack); 1538 } /* end while(tre_stack_num_objects(stack) > bottom) */
1584 node = tre_stack_pop_voidptr(stack);
1585 added_tags = tre_stack_pop_int(stack);
1586 if (first_pass)
1587 {
1588 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1589 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1590 + ((node->num_submatches > 0) ? 2 : 0);
1591 }
1592 regset = tre_stack_pop_voidptr(stack);
1593 tag_left = tre_stack_pop_int(stack);
1594 tag_right = tre_stack_pop_int(stack);
1595
1596 /* Add tags after both children, the left child gets a smaller
1597 tag than the right child. This guarantees that we prefer
1598 the left child over the right child. */
1599 /* XXX - This is not always necessary (if the children have
1600 tags which must be seen for every match of that child). */
1601 /* XXX - Check if this is the only place where tre_add_tag_right
1602 is used. If so, use tre_add_tag_left (putting the tag before
1603 the child as opposed after the child) and throw away
1604 tre_add_tag_right. */
1605 if (node->num_submatches > 0)
1606 {
1607 if (!first_pass)
1608 {
1609 status = tre_add_tag_right(mem, left, tag_left);
1610 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1611 if (status == REG_OK)
1612 status = tre_add_tag_right(mem, right, tag_right);
1613 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1614 }
1615 num_tags += 2;
1616 }
1617 direction = TRE_TAG_MAXIMIZE;
1618 break;
1619 }
1620
1621 default:
1622 assert(0);
1623 break;
1624
1625 } /* end switch(symbol) */
1626 } /* end while(tre_stack_num_objects(stack) > bottom) */
1627 1539
1628 if (!first_pass) 1540 if (!first_pass)
1629 tre_purge_regset(regset, tnfa, tag); 1541 tre_purge_regset(regset, tnfa, tag);
1630 1542
1631 if (!first_pass && minimal_tag >= 0) 1543 if (!first_pass && minimal_tag >= 0) {
1632 { 1544 int i;
1633 int i; 1545 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1634 for (i = 0; tnfa->minimal_tags[i] >= 0; i++); 1546 ;
1635 tnfa->minimal_tags[i] = tag; 1547 tnfa->minimal_tags[i] = tag;
1636 tnfa->minimal_tags[i + 1] = minimal_tag; 1548 tnfa->minimal_tags[i + 1] = minimal_tag;
1637 tnfa->minimal_tags[i + 2] = -1; 1549 tnfa->minimal_tags[i + 2] = -1;
1638 minimal_tag = -1; 1550 minimal_tag = -1;
1639 num_minimals++; 1551 num_minimals++;
1640 } 1552 }
1641 1553
1642 assert(tree->num_tags == num_tags); 1554 assert(tree->num_tags == num_tags);
1643 tnfa->end_tag = num_tags; 1555 tnfa->end_tag = num_tags;
1644 tnfa->num_tags = num_tags; 1556 tnfa->num_tags = num_tags;
1645 tnfa->num_minimals = num_minimals; 1557 tnfa->num_minimals = num_minimals;
1646 xfree(orig_regset); 1558 xfree(orig_regset);
1647 xfree(parents); 1559 xfree(parents);
1648 xfree(saved_states); 1560 xfree(saved_states);
1649 return status; 1561 return status;
1650 } 1562 }
1651 1563
1652
1653
1654 /* 1564 /*
1655 AST to TNFA compilation routines. 1565 AST to TNFA compilation routines.
1656 */ 1566 */
1657 1567
1658 typedef enum { 1568 typedef enum { COPY_RECURSE, COPY_SET_RESULT_PTR } tre_copyast_symbol_t;
1659 COPY_RECURSE,
1660 COPY_SET_RESULT_PTR
1661 } tre_copyast_symbol_t;
1662 1569
1663 /* Flags for tre_copy_ast(). */ 1570 /* Flags for tre_copy_ast(). */
1664 #define COPY_REMOVE_TAGS» 1 1571 #define COPY_REMOVE_TAGS 1
1665 #define COPY_MAXIMIZE_FIRST_TAG» 2 1572 #define COPY_MAXIMIZE_FIRST_TAG 2
1666 1573
1667 static reg_errcode_t 1574 static reg_errcode_t tre_copy_ast(tre_mem_t mem,
1668 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, 1575 tre_stack_t* stack,
1669 » int flags, int *pos_add, tre_tag_direction_t *tag_directions, 1576 tre_ast_node_t* ast,
1670 » tre_ast_node_t **copy, int *max_pos) 1577 int flags,
1671 { 1578 int* pos_add,
1579 tre_tag_direction_t* tag_directions,
1580 tre_ast_node_t** copy,
1581 int* max_pos) {
1672 reg_errcode_t status = REG_OK; 1582 reg_errcode_t status = REG_OK;
1673 int bottom = tre_stack_num_objects(stack); 1583 int bottom = tre_stack_num_objects(stack);
1674 int num_copied = 0; 1584 int num_copied = 0;
1675 int first_tag = 1; 1585 int first_tag = 1;
1676 tre_ast_node_t **result = copy; 1586 tre_ast_node_t** result = copy;
1677 tre_copyast_symbol_t symbol; 1587 tre_copyast_symbol_t symbol;
1678 1588
1679 STACK_PUSH(stack, voidptr, ast); 1589 STACK_PUSH(stack, voidptr, ast);
1680 STACK_PUSH(stack, int, COPY_RECURSE); 1590 STACK_PUSH(stack, int, COPY_RECURSE);
1681 1591
1682 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 1592 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {
1683 { 1593 tre_ast_node_t* node;
1684 tre_ast_node_t *node; 1594 if (status != REG_OK)
1685 if (status != REG_OK) 1595 break;
1686 » break;
1687 1596
1688 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack); 1597 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1689 switch (symbol) 1598 switch (symbol) {
1690 » { 1599 case COPY_SET_RESULT_PTR:
1691 » case COPY_SET_RESULT_PTR: 1600 result = tre_stack_pop_voidptr(stack);
1692 » result = tre_stack_pop_voidptr(stack); 1601 break;
1693 » break; 1602 case COPY_RECURSE:
1694 » case COPY_RECURSE: 1603 node = tre_stack_pop_voidptr(stack);
1695 » node = tre_stack_pop_voidptr(stack); 1604 switch (node->type) {
1696 » switch (node->type) 1605 case LITERAL: {
1697 » { 1606 tre_literal_t* lit = node->obj;
1698 » case LITERAL: 1607 int pos = lit->position;
1699 » { 1608 int min = lit->code_min;
1700 » » tre_literal_t *lit = node->obj; 1609 int max = lit->code_max;
1701 » » int pos = lit->position; 1610 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) {
1702 » » int min = lit->code_min; 1611 /* XXX - e.g. [ab] has only one position but two
1703 » » int max = lit->code_max; 1612 nodes, so we are creating holes in the state space
1704 » » if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 1613 here. Not fatal, just wastes memory. */
1705 » » { 1614 pos += *pos_add;
1706 » » /* XXX - e.g. [ab] has only one position but two 1615 num_copied++;
1707 » » nodes, so we are creating holes in the state space 1616 } else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) {
1708 » » here. Not fatal, just wastes memory. */ 1617 /* Change this tag to empty. */
1709 » » pos += *pos_add; 1618 min = EMPTY;
1710 » » num_copied++; 1619 max = pos = -1;
1711 » » } 1620 } else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) &&
1712 » » else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) 1621 first_tag) {
1713 » » { 1622 /* Maximize the first tag. */
1714 » » /* Change this tag to empty. */ 1623 tag_directions[max] = TRE_TAG_MAXIMIZE;
1715 » » min = EMPTY; 1624 first_tag = 0;
1716 » » max = pos = -1; 1625 }
1717 » » } 1626 *result = tre_ast_new_literal(mem, min, max, pos);
1718 » » else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) 1627 if (*result == NULL)
1719 » » » && first_tag) 1628 status = REG_ESPACE;
1720 » » { 1629 else {
1721 » » /* Maximize the first tag. */ 1630 tre_literal_t* p = (*result)->obj;
1722 » » tag_directions[max] = TRE_TAG_MAXIMIZE; 1631 p->class = lit->class;
1723 » » first_tag = 0; 1632 p->neg_classes = lit->neg_classes;
1724 » » } 1633 }
1725 » » *result = tre_ast_new_literal(mem, min, max, pos);
1726 » » if (*result == NULL)
1727 » » status = REG_ESPACE;
1728 » » else {
1729 » » tre_literal_t *p = (*result)->obj;
1730 » » p->class = lit->class;
1731 » » p->neg_classes = lit->neg_classes;
1732 » » }
1733 1634
1734 » » if (pos > *max_pos) 1635 if (pos > *max_pos)
1735 » » *max_pos = pos; 1636 *max_pos = pos;
1736 » » break; 1637 break;
1737 » } 1638 }
1738 » case UNION: 1639 case UNION: {
1739 » { 1640 tre_union_t* uni = node->obj;
1740 » » tre_union_t *uni = node->obj; 1641 tre_union_t* tmp;
1741 » » tre_union_t *tmp; 1642 *result = tre_ast_new_union(mem, uni->left, uni->right);
1742 » » *result = tre_ast_new_union(mem, uni->left, uni->right); 1643 if (*result == NULL) {
1743 » » if (*result == NULL) 1644 status = REG_ESPACE;
1744 » » { 1645 break;
1745 » » status = REG_ESPACE; 1646 }
1746 » » break; 1647 tmp = (*result)->obj;
1747 » » } 1648 result = &tmp->left;
1748 » » tmp = (*result)->obj; 1649 STACK_PUSHX(stack, voidptr, uni->right);
1749 » » result = &tmp->left; 1650 STACK_PUSHX(stack, int, COPY_RECURSE);
1750 » » STACK_PUSHX(stack, voidptr, uni->right); 1651 STACK_PUSHX(stack, voidptr, &tmp->right);
1751 » » STACK_PUSHX(stack, int, COPY_RECURSE); 1652 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1752 » » STACK_PUSHX(stack, voidptr, &tmp->right); 1653 STACK_PUSHX(stack, voidptr, uni->left);
1753 » » STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); 1654 STACK_PUSHX(stack, int, COPY_RECURSE);
1754 » » STACK_PUSHX(stack, voidptr, uni->left); 1655 break;
1755 » » STACK_PUSHX(stack, int, COPY_RECURSE); 1656 }
1756 » » break; 1657 case CATENATION: {
1757 » } 1658 tre_catenation_t* cat = node->obj;
1758 » case CATENATION: 1659 tre_catenation_t* tmp;
1759 » { 1660 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1760 » » tre_catenation_t *cat = node->obj; 1661 if (*result == NULL) {
1761 » » tre_catenation_t *tmp; 1662 status = REG_ESPACE;
1762 » » *result = tre_ast_new_catenation(mem, cat->left, cat->right); 1663 break;
1763 » » if (*result == NULL) 1664 }
1764 » » { 1665 tmp = (*result)->obj;
1765 » » status = REG_ESPACE; 1666 tmp->left = NULL;
1766 » » break; 1667 tmp->right = NULL;
1767 » » } 1668 result = &tmp->left;
1768 » » tmp = (*result)->obj;
1769 » » tmp->left = NULL;
1770 » » tmp->right = NULL;
1771 » » result = &tmp->left;
1772 1669
1773 » » STACK_PUSHX(stack, voidptr, cat->right); 1670 STACK_PUSHX(stack, voidptr, cat->right);
1774 » » STACK_PUSHX(stack, int, COPY_RECURSE); 1671 STACK_PUSHX(stack, int, COPY_RECURSE);
1775 » » STACK_PUSHX(stack, voidptr, &tmp->right); 1672 STACK_PUSHX(stack, voidptr, &tmp->right);
1776 » » STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); 1673 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1777 » » STACK_PUSHX(stack, voidptr, cat->left); 1674 STACK_PUSHX(stack, voidptr, cat->left);
1778 » » STACK_PUSHX(stack, int, COPY_RECURSE); 1675 STACK_PUSHX(stack, int, COPY_RECURSE);
1779 » » break; 1676 break;
1780 » } 1677 }
1781 » case ITERATION: 1678 case ITERATION: {
1782 » { 1679 tre_iteration_t* iter = node->obj;
1783 » » tre_iteration_t *iter = node->obj; 1680 STACK_PUSHX(stack, voidptr, iter->arg);
1784 » » STACK_PUSHX(stack, voidptr, iter->arg); 1681 STACK_PUSHX(stack, int, COPY_RECURSE);
1785 » » STACK_PUSHX(stack, int, COPY_RECURSE); 1682 *result = tre_ast_new_iter(mem, iter->arg, iter->min, iter->max,
1786 » » *result = tre_ast_new_iter(mem, iter->arg, iter->min, 1683 iter->minimal);
1787 » » » » » iter->max, iter->minimal); 1684 if (*result == NULL) {
1788 » » if (*result == NULL) 1685 status = REG_ESPACE;
1789 » » { 1686 break;
1790 » » status = REG_ESPACE; 1687 }
1791 » » break; 1688 iter = (*result)->obj;
1792 » » } 1689 result = &iter->arg;
1793 » » iter = (*result)->obj; 1690 break;
1794 » » result = &iter->arg; 1691 }
1795 » » break; 1692 default:
1796 » } 1693 assert(0);
1797 » default: 1694 break;
1798 » assert(0); 1695 }
1799 » break; 1696 break;
1800 » }
1801 » break;
1802 » }
1803 } 1697 }
1698 }
1804 *pos_add += num_copied; 1699 *pos_add += num_copied;
1805 return status; 1700 return status;
1806 } 1701 }
1807 1702
1808 typedef enum { 1703 typedef enum { EXPAND_RECURSE, EXPAND_AFTER_ITER } tre_expand_ast_symbol_t;
1809 EXPAND_RECURSE,
1810 EXPAND_AFTER_ITER
1811 } tre_expand_ast_symbol_t;
1812 1704
1813 /* Expands each iteration node that has a finite nonzero minimum or maximum 1705 /* Expands each iteration node that has a finite nonzero minimum or maximum
1814 iteration count to a catenated sequence of copies of the node. */ 1706 iteration count to a catenated sequence of copies of the node. */
1815 static reg_errcode_t 1707 static reg_errcode_t tre_expand_ast(tre_mem_t mem,
1816 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, 1708 tre_stack_t* stack,
1817 » int *position, tre_tag_direction_t *tag_directions) 1709 tre_ast_node_t* ast,
1818 { 1710 int* position,
1711 tre_tag_direction_t* tag_directions) {
1819 reg_errcode_t status = REG_OK; 1712 reg_errcode_t status = REG_OK;
1820 int bottom = tre_stack_num_objects(stack); 1713 int bottom = tre_stack_num_objects(stack);
1821 int pos_add = 0; 1714 int pos_add = 0;
1822 int pos_add_total = 0; 1715 int pos_add_total = 0;
1823 int max_pos = 0; 1716 int max_pos = 0;
1824 int iter_depth = 0; 1717 int iter_depth = 0;
1825 1718
1826 STACK_PUSHR(stack, voidptr, ast); 1719 STACK_PUSHR(stack, voidptr, ast);
1827 STACK_PUSHR(stack, int, EXPAND_RECURSE); 1720 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1828 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 1721 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {
1829 { 1722 tre_ast_node_t* node;
1830 tre_ast_node_t *node; 1723 tre_expand_ast_symbol_t symbol;
1831 tre_expand_ast_symbol_t symbol;
1832 1724
1833 if (status != REG_OK) 1725 if (status != REG_OK)
1834 » break; 1726 break;
1835 1727
1836 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack); 1728 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1837 node = tre_stack_pop_voidptr(stack); 1729 node = tre_stack_pop_voidptr(stack);
1838 switch (symbol) 1730 switch (symbol) {
1839 » { 1731 case EXPAND_RECURSE:
1840 » case EXPAND_RECURSE: 1732 switch (node->type) {
1841 » switch (node->type) 1733 case LITERAL: {
1842 » { 1734 tre_literal_t* lit = node->obj;
1843 » case LITERAL: 1735 if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) {
1844 » { 1736 lit->position += pos_add;
1845 » » tre_literal_t *lit= node->obj; 1737 if (lit->position > max_pos)
1846 » » if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) 1738 max_pos = lit->position;
1847 » » { 1739 }
1848 » » lit->position += pos_add; 1740 break;
1849 » » if (lit->position > max_pos) 1741 }
1850 » » max_pos = lit->position; 1742 case UNION: {
1851 » » } 1743 tre_union_t* uni = node->obj;
1852 » » break; 1744 STACK_PUSHX(stack, voidptr, uni->right);
1853 » } 1745 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1854 » case UNION: 1746 STACK_PUSHX(stack, voidptr, uni->left);
1855 » { 1747 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1856 » » tre_union_t *uni = node->obj; 1748 break;
1857 » » STACK_PUSHX(stack, voidptr, uni->right); 1749 }
1858 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); 1750 case CATENATION: {
1859 » » STACK_PUSHX(stack, voidptr, uni->left); 1751 tre_catenation_t* cat = node->obj;
1860 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); 1752 STACK_PUSHX(stack, voidptr, cat->right);
1861 » » break; 1753 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1862 » } 1754 STACK_PUSHX(stack, voidptr, cat->left);
1863 » case CATENATION: 1755 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1864 » { 1756 break;
1865 » » tre_catenation_t *cat = node->obj; 1757 }
1866 » » STACK_PUSHX(stack, voidptr, cat->right); 1758 case ITERATION: {
1867 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); 1759 tre_iteration_t* iter = node->obj;
1868 » » STACK_PUSHX(stack, voidptr, cat->left); 1760 STACK_PUSHX(stack, int, pos_add);
1869 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); 1761 STACK_PUSHX(stack, voidptr, node);
1870 » » break; 1762 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1871 » } 1763 STACK_PUSHX(stack, voidptr, iter->arg);
1872 » case ITERATION: 1764 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1873 » { 1765 /* If we are going to expand this node at EXPAND_AFTER_ITER
1874 » » tre_iteration_t *iter = node->obj; 1766 then don't increase the `pos' fields of the nodes now, it
1875 » » STACK_PUSHX(stack, int, pos_add); 1767 will get done when expanding. */
1876 » » STACK_PUSHX(stack, voidptr, node); 1768 if (iter->min > 1 || iter->max > 1)
1877 » » STACK_PUSHX(stack, int, EXPAND_AFTER_ITER); 1769 pos_add = 0;
1878 » » STACK_PUSHX(stack, voidptr, iter->arg); 1770 iter_depth++;
1879 » » STACK_PUSHX(stack, int, EXPAND_RECURSE); 1771 break;
1880 » » /* If we are going to expand this node at EXPAND_AFTER_ITER 1772 }
1881 » » then don't increase the `pos' fields of the nodes now, it 1773 default:
1882 » » will get done when expanding. */ 1774 assert(0);
1883 » » if (iter->min > 1 || iter->max > 1) 1775 break;
1884 » » pos_add = 0; 1776 }
1885 » » iter_depth++; 1777 break;
1886 » » break; 1778 case EXPAND_AFTER_ITER: {
1887 » } 1779 tre_iteration_t* iter = node->obj;
1888 » default: 1780 int pos_add_last;
1889 » assert(0); 1781 pos_add = tre_stack_pop_int(stack);
1890 » break; 1782 pos_add_last = pos_add;
1891 » } 1783 if (iter->min > 1 || iter->max > 1) {
1892 » break; 1784 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1893 » case EXPAND_AFTER_ITER: 1785 int j;
1894 » { 1786 int pos_add_save = pos_add;
1895 » tre_iteration_t *iter = node->obj;
1896 » int pos_add_last;
1897 » pos_add = tre_stack_pop_int(stack);
1898 » pos_add_last = pos_add;
1899 » if (iter->min > 1 || iter->max > 1)
1900 » {
1901 » » tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1902 » » int j;
1903 » » int pos_add_save = pos_add;
1904 1787
1905 » » /* Create a catenated sequence of copies of the node. */ 1788 /* Create a catenated sequence of copies of the node. */
1906 » » for (j = 0; j < iter->min; j++) 1789 for (j = 0; j < iter->min; j++) {
1907 » » { 1790 tre_ast_node_t* copy;
1908 » » tre_ast_node_t *copy; 1791 /* Remove tags from all but the last copy. */
1909 » » /* Remove tags from all but the last copy. */ 1792 int flags = ((j + 1 < iter->min) ? COPY_REMOVE_TAGS
1910 » » int flags = ((j + 1 < iter->min) 1793 : COPY_MAXIMIZE_FIRST_TAG);
1911 » » » » ? COPY_REMOVE_TAGS 1794 pos_add_save = pos_add;
1912 » » » » : COPY_MAXIMIZE_FIRST_TAG); 1795 status = tre_copy_ast(mem, stack, iter->arg, flags, &pos_add,
1913 » » pos_add_save = pos_add; 1796 tag_directions, &copy, &max_pos);
1914 » » status = tre_copy_ast(mem, stack, iter->arg, flags, 1797 if (status != REG_OK)
1915 » » » » » &pos_add, tag_directions, &copy, 1798 return status;
1916 » » » » » &max_pos); 1799 if (seq1 != NULL)
1917 » » if (status != REG_OK) 1800 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1918 » » return status; 1801 else
1919 » » if (seq1 != NULL) 1802 seq1 = copy;
1920 » » seq1 = tre_ast_new_catenation(mem, seq1, copy); 1803 if (seq1 == NULL)
1921 » » else 1804 return REG_ESPACE;
1922 » » seq1 = copy; 1805 }
1923 » » if (seq1 == NULL)
1924 » » return REG_ESPACE;
1925 » » }
1926 1806
1927 » » if (iter->max == -1) 1807 if (iter->max == -1) {
1928 » » { 1808 /* No upper limit. */
1929 » » /* No upper limit. */ 1809 pos_add_save = pos_add;
1930 » » pos_add_save = pos_add; 1810 status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL,
1931 » » status = tre_copy_ast(mem, stack, iter->arg, 0, 1811 &seq2, &max_pos);
1932 » » » » » &pos_add, NULL, &seq2, &max_pos); 1812 if (status != REG_OK)
1933 » » if (status != REG_OK) 1813 return status;
1934 » » return status; 1814 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1935 » » seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); 1815 if (seq2 == NULL)
1936 » » if (seq2 == NULL) 1816 return REG_ESPACE;
1937 » » return REG_ESPACE; 1817 } else {
1938 » » } 1818 for (j = iter->min; j < iter->max; j++) {
1939 » » else 1819 tre_ast_node_t *tmp, *copy;
1940 » » { 1820 pos_add_save = pos_add;
1941 » » for (j = iter->min; j < iter->max; j++) 1821 status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL,
1942 » » { 1822 &copy, &max_pos);
1943 » » » tre_ast_node_t *tmp, *copy; 1823 if (status != REG_OK)
1944 » » » pos_add_save = pos_add; 1824 return status;
1945 » » » status = tre_copy_ast(mem, stack, iter->arg, 0, 1825 if (seq2 != NULL)
1946 » » » » » &pos_add, NULL, &copy, &max_pos); 1826 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1947 » » » if (status != REG_OK) 1827 else
1948 » » » return status; 1828 seq2 = copy;
1949 » » » if (seq2 != NULL) 1829 if (seq2 == NULL)
1950 » » » seq2 = tre_ast_new_catenation(mem, copy, seq2); 1830 return REG_ESPACE;
1951 » » » else 1831 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1952 » » » seq2 = copy; 1832 if (tmp == NULL)
1953 » » » if (seq2 == NULL) 1833 return REG_ESPACE;
1954 » » » return REG_ESPACE; 1834 seq2 = tre_ast_new_union(mem, tmp, seq2);
1955 » » » tmp = tre_ast_new_literal(mem, EMPTY, -1, -1); 1835 if (seq2 == NULL)
1956 » » » if (tmp == NULL) 1836 return REG_ESPACE;
1957 » » » return REG_ESPACE; 1837 }
1958 » » » seq2 = tre_ast_new_union(mem, tmp, seq2); 1838 }
1959 » » » if (seq2 == NULL)
1960 » » » return REG_ESPACE;
1961 » » }
1962 » » }
1963 1839
1964 » » pos_add = pos_add_save; 1840 pos_add = pos_add_save;
1965 » » if (seq1 == NULL) 1841 if (seq1 == NULL)
1966 » » seq1 = seq2; 1842 seq1 = seq2;
1967 » » else if (seq2 != NULL) 1843 else if (seq2 != NULL)
1968 » » seq1 = tre_ast_new_catenation(mem, seq1, seq2); 1844 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1969 » » if (seq1 == NULL) 1845 if (seq1 == NULL)
1970 » » return REG_ESPACE; 1846 return REG_ESPACE;
1971 » » node->obj = seq1->obj; 1847 node->obj = seq1->obj;
1972 » » node->type = seq1->type; 1848 node->type = seq1->type;
1973 » } 1849 }
1974 1850
1975 » iter_depth--; 1851 iter_depth--;
1976 » pos_add_total += pos_add - pos_add_last; 1852 pos_add_total += pos_add - pos_add_last;
1977 » if (iter_depth == 0) 1853 if (iter_depth == 0)
1978 » pos_add = pos_add_total; 1854 pos_add = pos_add_total;
1979 1855
1980 » break; 1856 break;
1981 » } 1857 }
1982 » default: 1858 default:
1983 » assert(0); 1859 assert(0);
1984 » break; 1860 break;
1985 » }
1986 } 1861 }
1862 }
1987 1863
1988 *position += pos_add_total; 1864 *position += pos_add_total;
1989 1865
1990 /* `max_pos' should never be larger than `*position' if the above 1866 /* `max_pos' should never be larger than `*position' if the above
1991 code works, but just an extra safeguard let's make sure 1867 code works, but just an extra safeguard let's make sure
1992 `*position' is set large enough so enough memory will be 1868 `*position' is set large enough so enough memory will be
1993 allocated for the transition table. */ 1869 allocated for the transition table. */
1994 if (max_pos > *position) 1870 if (max_pos > *position)
1995 *position = max_pos; 1871 *position = max_pos;
1996 1872
1997 return status; 1873 return status;
1998 } 1874 }
1999 1875
2000 static tre_pos_and_tags_t * 1876 static tre_pos_and_tags_t* tre_set_empty(tre_mem_t mem) {
2001 tre_set_empty(tre_mem_t mem) 1877 tre_pos_and_tags_t* new_set;
2002 {
2003 tre_pos_and_tags_t *new_set;
2004 1878
2005 new_set = tre_mem_calloc(mem, sizeof(*new_set)); 1879 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2006 if (new_set == NULL) 1880 if (new_set == NULL)
2007 return NULL; 1881 return NULL;
2008 1882
2009 new_set[0].position = -1; 1883 new_set[0].position = -1;
2010 new_set[0].code_min = -1; 1884 new_set[0].code_min = -1;
2011 new_set[0].code_max = -1; 1885 new_set[0].code_max = -1;
2012 1886
2013 return new_set; 1887 return new_set;
2014 } 1888 }
2015 1889
2016 static tre_pos_and_tags_t * 1890 static tre_pos_and_tags_t* tre_set_one(tre_mem_t mem,
2017 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max, 1891 int position,
2018 » tre_ctype_t class, tre_ctype_t *neg_classes, int backref) 1892 int code_min,
2019 { 1893 int code_max,
2020 tre_pos_and_tags_t *new_set; 1894 tre_ctype_t class,
1895 tre_ctype_t* neg_classes,
1896 int backref) {
1897 tre_pos_and_tags_t* new_set;
2021 1898
2022 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); 1899 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2023 if (new_set == NULL) 1900 if (new_set == NULL)
2024 return NULL; 1901 return NULL;
2025 1902
2026 new_set[0].position = position; 1903 new_set[0].position = position;
2027 new_set[0].code_min = code_min; 1904 new_set[0].code_min = code_min;
2028 new_set[0].code_max = code_max; 1905 new_set[0].code_max = code_max;
2029 new_set[0].class = class; 1906 new_set[0].class = class;
2030 new_set[0].neg_classes = neg_classes; 1907 new_set[0].neg_classes = neg_classes;
2031 new_set[0].backref = backref; 1908 new_set[0].backref = backref;
2032 new_set[1].position = -1; 1909 new_set[1].position = -1;
2033 new_set[1].code_min = -1; 1910 new_set[1].code_min = -1;
2034 new_set[1].code_max = -1; 1911 new_set[1].code_max = -1;
2035 1912
2036 return new_set; 1913 return new_set;
2037 } 1914 }
2038 1915
2039 static tre_pos_and_tags_t * 1916 static tre_pos_and_tags_t* tre_set_union(tre_mem_t mem,
2040 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, 1917 tre_pos_and_tags_t* set1,
2041 » int *tags, int assertions) 1918 tre_pos_and_tags_t* set2,
2042 { 1919 int* tags,
1920 int assertions) {
2043 int s1, s2, i, j; 1921 int s1, s2, i, j;
2044 tre_pos_and_tags_t *new_set; 1922 tre_pos_and_tags_t* new_set;
2045 int *new_tags; 1923 int* new_tags;
2046 int num_tags; 1924 int num_tags;
2047 1925
2048 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++); 1926 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++)
2049 for (s1 = 0; set1[s1].position >= 0; s1++); 1927 ;
2050 for (s2 = 0; set2[s2].position >= 0; s2++); 1928 for (s1 = 0; set1[s1].position >= 0; s1++)
1929 ;
1930 for (s2 = 0; set2[s2].position >= 0; s2++)
1931 ;
2051 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); 1932 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2052 if (!new_set ) 1933 if (!new_set)
2053 return NULL; 1934 return NULL;
2054 1935
2055 for (s1 = 0; set1[s1].position >= 0; s1++) 1936 for (s1 = 0; set1[s1].position >= 0; s1++) {
2056 { 1937 new_set[s1].position = set1[s1].position;
2057 new_set[s1].position = set1[s1].position; 1938 new_set[s1].code_min = set1[s1].code_min;
2058 new_set[s1].code_min = set1[s1].code_min; 1939 new_set[s1].code_max = set1[s1].code_max;
2059 new_set[s1].code_max = set1[s1].code_max; 1940 new_set[s1].assertions = set1[s1].assertions | assertions;
2060 new_set[s1].assertions = set1[s1].assertions | assertions; 1941 new_set[s1].class = set1[s1].class;
2061 new_set[s1].class = set1[s1].class; 1942 new_set[s1].neg_classes = set1[s1].neg_classes;
2062 new_set[s1].neg_classes = set1[s1].neg_classes; 1943 new_set[s1].backref = set1[s1].backref;
2063 new_set[s1].backref = set1[s1].backref; 1944 if (set1[s1].tags == NULL && tags == NULL)
2064 if (set1[s1].tags == NULL && tags == NULL) 1945 new_set[s1].tags = NULL;
2065 » new_set[s1].tags = NULL; 1946 else {
2066 else 1947 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++)
2067 » { 1948 ;
2068 » for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++); 1949 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) * (i + num_tags + 1)));
2069 » new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) 1950 if (new_tags == NULL)
2070 » » » » » * (i + num_tags + 1))); 1951 return NULL;
2071 » if (new_tags == NULL) 1952 for (j = 0; j < i; j++)
2072 » return NULL; 1953 new_tags[j] = set1[s1].tags[j];
2073 » for (j = 0; j < i; j++) 1954 for (i = 0; i < num_tags; i++)
2074 » new_tags[j] = set1[s1].tags[j]; 1955 new_tags[j + i] = tags[i];
2075 » for (i = 0; i < num_tags; i++) 1956 new_tags[j + i] = -1;
2076 » new_tags[j + i] = tags[i]; 1957 new_set[s1].tags = new_tags;
2077 » new_tags[j + i] = -1;
2078 » new_set[s1].tags = new_tags;
2079 » }
2080 } 1958 }
1959 }
2081 1960
2082 for (s2 = 0; set2[s2].position >= 0; s2++) 1961 for (s2 = 0; set2[s2].position >= 0; s2++) {
2083 { 1962 new_set[s1 + s2].position = set2[s2].position;
2084 new_set[s1 + s2].position = set2[s2].position; 1963 new_set[s1 + s2].code_min = set2[s2].code_min;
2085 new_set[s1 + s2].code_min = set2[s2].code_min; 1964 new_set[s1 + s2].code_max = set2[s2].code_max;
2086 new_set[s1 + s2].code_max = set2[s2].code_max; 1965 /* XXX - why not | assertions here as well? */
2087 /* XXX - why not | assertions here as well? */ 1966 new_set[s1 + s2].assertions = set2[s2].assertions;
2088 new_set[s1 + s2].assertions = set2[s2].assertions; 1967 new_set[s1 + s2].class = set2[s2].class;
2089 new_set[s1 + s2].class = set2[s2].class; 1968 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2090 new_set[s1 + s2].neg_classes = set2[s2].neg_classes; 1969 new_set[s1 + s2].backref = set2[s2].backref;
2091 new_set[s1 + s2].backref = set2[s2].backref; 1970 if (set2[s2].tags == NULL)
2092 if (set2[s2].tags == NULL) 1971 new_set[s1 + s2].tags = NULL;
2093 » new_set[s1 + s2].tags = NULL; 1972 else {
2094 else 1973 for (i = 0; set2[s2].tags[i] >= 0; i++)
2095 » { 1974 ;
2096 » for (i = 0; set2[s2].tags[i] >= 0; i++); 1975 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2097 » new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1)); 1976 if (new_tags == NULL)
2098 » if (new_tags == NULL) 1977 return NULL;
2099 » return NULL; 1978 for (j = 0; j < i; j++)
2100 » for (j = 0; j < i; j++) 1979 new_tags[j] = set2[s2].tags[j];
2101 » new_tags[j] = set2[s2].tags[j]; 1980 new_tags[j] = -1;
2102 » new_tags[j] = -1; 1981 new_set[s1 + s2].tags = new_tags;
2103 » new_set[s1 + s2].tags = new_tags;
2104 » }
2105 } 1982 }
1983 }
2106 new_set[s1 + s2].position = -1; 1984 new_set[s1 + s2].position = -1;
2107 return new_set; 1985 return new_set;
2108 } 1986 }
2109 1987
2110 /* Finds the empty path through `node' which is the one that should be 1988 /* Finds the empty path through `node' which is the one that should be
2111 taken according to POSIX.2 rules, and adds the tags on that path to 1989 taken according to POSIX.2 rules, and adds the tags on that path to
2112 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is 1990 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2113 set to the number of tags seen on the path. */ 1991 set to the number of tags seen on the path. */
2114 static reg_errcode_t 1992 static reg_errcode_t tre_match_empty(tre_stack_t* stack,
2115 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags, 1993 tre_ast_node_t* node,
2116 » » int *assertions, int *num_tags_seen) 1994 int* tags,
2117 { 1995 int* assertions,
2118 tre_literal_t *lit; 1996 int* num_tags_seen) {
2119 tre_union_t *uni; 1997 tre_literal_t* lit;
2120 tre_catenation_t *cat; 1998 tre_union_t* uni;
2121 tre_iteration_t *iter; 1999 tre_catenation_t* cat;
2000 tre_iteration_t* iter;
2122 int i; 2001 int i;
2123 int bottom = tre_stack_num_objects(stack); 2002 int bottom = tre_stack_num_objects(stack);
2124 reg_errcode_t status = REG_OK; 2003 reg_errcode_t status = REG_OK;
2125 if (num_tags_seen) 2004 if (num_tags_seen)
2126 *num_tags_seen = 0; 2005 *num_tags_seen = 0;
2127 2006
2128 status = tre_stack_push_voidptr(stack, node); 2007 status = tre_stack_push_voidptr(stack, node);
2129 2008
2130 /* Walk through the tree recursively. */ 2009 /* Walk through the tree recursively. */
2131 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) 2010 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {
2132 { 2011 node = tre_stack_pop_voidptr(stack);
2133 node = tre_stack_pop_voidptr(stack);
2134 2012
2135 switch (node->type) 2013 switch (node->type) {
2136 » { 2014 case LITERAL:
2137 » case LITERAL: 2015 lit = (tre_literal_t*)node->obj;
2138 » lit = (tre_literal_t *)node->obj; 2016 switch (lit->code_min) {
2139 » switch (lit->code_min) 2017 case TAG:
2140 » { 2018 if (lit->code_max >= 0) {
2141 » case TAG: 2019 if (tags != NULL) {
2142 » if (lit->code_max >= 0) 2020 /* Add the tag to `tags'. */
2143 » » { 2021 for (i = 0; tags[i] >= 0; i++)
2144 » » if (tags != NULL) 2022 if (tags[i] == lit->code_max)
2145 » » { 2023 break;
2146 » » /* Add the tag to `tags'. */ 2024 if (tags[i] < 0) {
2147 » » for (i = 0; tags[i] >= 0; i++) 2025 tags[i] = lit->code_max;
2148 » » » if (tags[i] == lit->code_max) 2026 tags[i + 1] = -1;
2149 » » » break; 2027 }
2150 » » if (tags[i] < 0) 2028 }
2151 » » » { 2029 if (num_tags_seen)
2152 » » » tags[i] = lit->code_max; 2030 (*num_tags_seen)++;
2153 » » » tags[i + 1] = -1; 2031 }
2154 » » » } 2032 break;
2155 » » } 2033 case ASSERTION:
2156 » » if (num_tags_seen) 2034 assert(lit->code_max >= 1 || lit->code_max <= ASSERT_LAST);
2157 » » (*num_tags_seen)++; 2035 if (assertions != NULL)
2158 » » } 2036 *assertions |= lit->code_max;
2159 » break; 2037 break;
2160 » case ASSERTION: 2038 case EMPTY:
2161 » assert(lit->code_max >= 1 2039 break;
2162 » » || lit->code_max <= ASSERT_LAST); 2040 default:
2163 » if (assertions != NULL) 2041 assert(0);
2164 » » *assertions |= lit->code_max; 2042 break;
2165 » break; 2043 }
2166 » case EMPTY: 2044 break;
2167 » break;
2168 » default:
2169 » assert(0);
2170 » break;
2171 » }
2172 » break;
2173 2045
2174 » case UNION: 2046 case UNION:
2175 » /* Subexpressions starting earlier take priority over ones 2047 /* Subexpressions starting earlier take priority over ones
2176 » starting later, so we prefer the left subexpression over the 2048 starting later, so we prefer the left subexpression over the
2177 » right subexpression. */ 2049 right subexpression. */
2178 » uni = (tre_union_t *)node->obj; 2050 uni = (tre_union_t*)node->obj;
2179 » if (uni->left->nullable) 2051 if (uni->left->nullable)
2180 » STACK_PUSHX(stack, voidptr, uni->left) 2052 STACK_PUSHX(stack, voidptr, uni->left)
2181 » else if (uni->right->nullable) 2053 else if (uni->right->nullable)
2182 » STACK_PUSHX(stack, voidptr, uni->right) 2054 STACK_PUSHX(stack, voidptr, uni->right)
2183 » else 2055 else
2184 » assert(0); 2056 assert(0);
2185 » break; 2057 break;
2186 2058
2187 » case CATENATION: 2059 case CATENATION:
2188 » /* The path must go through both children. */ 2060 /* The path must go through both children. */
2189 » cat = (tre_catenation_t *)node->obj; 2061 cat = (tre_catenation_t*)node->obj;
2190 » assert(cat->left->nullable); 2062 assert(cat->left->nullable);
2191 » assert(cat->right->nullable); 2063 assert(cat->right->nullable);
2192 » STACK_PUSHX(stack, voidptr, cat->left); 2064 STACK_PUSHX(stack, voidptr, cat->left);
2193 » STACK_PUSHX(stack, voidptr, cat->right); 2065 STACK_PUSHX(stack, voidptr, cat->right);
2194 » break; 2066 break;
2195 2067
2196 » case ITERATION: 2068 case ITERATION:
2197 » /* A match with an empty string is preferred over no match at 2069 /* A match with an empty string is preferred over no match at
2198 » all, so we go through the argument if possible. */ 2070 all, so we go through the argument if possible. */
2199 » iter = (tre_iteration_t *)node->obj; 2071 iter = (tre_iteration_t*)node->obj;
2200 » if (iter->arg->nullable) 2072 if (iter->arg->nullable)
2201 » STACK_PUSHX(stack, voidptr, iter->arg); 2073 STACK_PUSHX(stack, voidptr, iter->arg);
2202 » break; 2074 break;
2203 2075
2204 » default: 2076 default:
2205 » assert(0); 2077 assert(0);
2206 » break; 2078 break;
2207 » }
2208 } 2079 }
2080 }
2209 2081
2210 return status; 2082 return status;
2211 } 2083 }
2212 2084
2213
2214 typedef enum { 2085 typedef enum {
2215 NFL_RECURSE, 2086 NFL_RECURSE,
2216 NFL_POST_UNION, 2087 NFL_POST_UNION,
2217 NFL_POST_CATENATION, 2088 NFL_POST_CATENATION,
2218 NFL_POST_ITERATION 2089 NFL_POST_ITERATION
2219 } tre_nfl_stack_symbol_t; 2090 } tre_nfl_stack_symbol_t;
2220 2091
2221
2222 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for 2092 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2223 the nodes of the AST `tree'. */ 2093 the nodes of the AST `tree'. */
2224 static reg_errcode_t 2094 static reg_errcode_t tre_compute_nfl(tre_mem_t mem,
2225 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) 2095 tre_stack_t* stack,
2226 { 2096 tre_ast_node_t* tree) {
2227 int bottom = tre_stack_num_objects(stack); 2097 int bottom = tre_stack_num_objects(stack);
2228 2098
2229 STACK_PUSHR(stack, voidptr, tree); 2099 STACK_PUSHR(stack, voidptr, tree);
2230 STACK_PUSHR(stack, int, NFL_RECURSE); 2100 STACK_PUSHR(stack, int, NFL_RECURSE);
2231 2101
2232 while (tre_stack_num_objects(stack) > bottom) 2102 while (tre_stack_num_objects(stack) > bottom) {
2233 { 2103 tre_nfl_stack_symbol_t symbol;
2234 tre_nfl_stack_symbol_t symbol; 2104 tre_ast_node_t* node;
2235 tre_ast_node_t *node; 2105
2236 2106 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2237 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack); 2107 node = tre_stack_pop_voidptr(stack);
2238 node = tre_stack_pop_voidptr(stack); 2108 switch (symbol) {
2239 switch (symbol) 2109 case NFL_RECURSE:
2240 { 2110 switch (node->type) {
2241 case NFL_RECURSE: 2111 case LITERAL: {
2242 switch (node->type) 2112 tre_literal_t* lit = (tre_literal_t*)node->obj;
2243 { 2113 if (IS_BACKREF(lit)) {
2244 case LITERAL: 2114 /* Back references: nullable = false, firstpos = {i},
2245 { 2115 lastpos = {i}. */
2246 tre_literal_t *lit = (tre_literal_t *)node->obj; 2116 node->nullable = 0;
2247 if (IS_BACKREF(lit)) 2117 node->firstpos =
2248 { 2118 tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);
2249 /* Back references: nullable = false, firstpos = {i}, 2119 if (!node->firstpos)
2250 lastpos = {i}. */ 2120 return REG_ESPACE;
2251 node->nullable = 0; 2121 node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX,
2252 node->firstpos = tre_set_one(mem, lit->position, 0, 2122 0, NULL, (int)lit->code_max);
2253 TRE_CHAR_MAX, 0, NULL, -1); 2123 if (!node->lastpos)
2254 if (!node->firstpos) 2124 return REG_ESPACE;
2255 return REG_ESPACE; 2125 } else if (lit->code_min < 0) {
2256 node->lastpos = tre_set_one(mem, lit->position, 0, 2126 /* Tags, empty strings, params, and zero width assertions:
2257 TRE_CHAR_MAX, 0, NULL, 2127 nullable = true, firstpos = {}, and lastpos = {}. */
2258 (int)lit->code_max); 2128 node->nullable = 1;
2259 if (!node->lastpos) 2129 node->firstpos = tre_set_empty(mem);
2260 return REG_ESPACE; 2130 if (!node->firstpos)
2261 } 2131 return REG_ESPACE;
2262 else if (lit->code_min < 0) 2132 node->lastpos = tre_set_empty(mem);
2263 { 2133 if (!node->lastpos)
2264 /* Tags, empty strings, params, and zero width assertions: 2134 return REG_ESPACE;
2265 nullable = true, firstpos = {}, and lastpos = {}. */ 2135 } else {
2266 node->nullable = 1; 2136 /* Literal at position i: nullable = false, firstpos = {i},
2267 node->firstpos = tre_set_empty(mem); 2137 lastpos = {i}. */
2268 if (!node->firstpos) 2138 node->nullable = 0;
2269 return REG_ESPACE; 2139 node->firstpos =
2270 node->lastpos = tre_set_empty(mem); 2140 tre_set_one(mem, lit->position, (int)lit->code_min,
2271 if (!node->lastpos) 2141 (int)lit->code_max, 0, NULL, -1);
2272 return REG_ESPACE; 2142 if (!node->firstpos)
2273 } 2143 return REG_ESPACE;
2274 else 2144 node->lastpos = tre_set_one(
2275 { 2145 mem, lit->position, (int)lit->code_min, (int)lit->code_max,
2276 /* Literal at position i: nullable = false, firstpos = {i}, 2146 lit->class, lit->neg_classes, -1);
2277 lastpos = {i}. */ 2147 if (!node->lastpos)
2278 node->nullable = 0; 2148 return REG_ESPACE;
2279 node->firstpos = 2149 }
2280 tre_set_one(mem, lit->position, (int)lit->code_min, 2150 break;
2281 (int)lit->code_max, 0, NULL, -1); 2151 }
2282 if (!node->firstpos) 2152
2283 return REG_ESPACE; 2153 case UNION:
2284 node->lastpos = tre_set_one(mem, lit->position, 2154 /* Compute the attributes for the two subtrees, and after that
2285 (int)lit->code_min, 2155 for this node. */
2286 (int)lit->code_max, 2156 STACK_PUSHR(stack, voidptr, node);
2287 lit->class, lit->neg_classes, 2157 STACK_PUSHR(stack, int, NFL_POST_UNION);
2288 -1); 2158 STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->right);
2289 if (!node->lastpos) 2159 STACK_PUSHR(stack, int, NFL_RECURSE);
2290 return REG_ESPACE; 2160 STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->left);
2291 } 2161 STACK_PUSHR(stack, int, NFL_RECURSE);
2292 break; 2162 break;
2293 } 2163
2294 2164 case CATENATION:
2295 case UNION: 2165 /* Compute the attributes for the two subtrees, and after that
2296 /* Compute the attributes for the two subtrees, and after that 2166 for this node. */
2297 for this node. */ 2167 STACK_PUSHR(stack, voidptr, node);
2298 STACK_PUSHR(stack, voidptr, node); 2168 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2299 STACK_PUSHR(stack, int, NFL_POST_UNION); 2169 STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->right);
2300 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right); 2170 STACK_PUSHR(stack, int, NFL_RECURSE);
2301 STACK_PUSHR(stack, int, NFL_RECURSE); 2171 STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->left);
2302 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left); 2172 STACK_PUSHR(stack, int, NFL_RECURSE);
2303 STACK_PUSHR(stack, int, NFL_RECURSE); 2173 break;
2304 break; 2174
2305 2175 case ITERATION:
2306 case CATENATION: 2176 /* Compute the attributes for the subtree, and after that for
2307 /* Compute the attributes for the two subtrees, and after that 2177 this node. */
2308 for this node. */ 2178 STACK_PUSHR(stack, voidptr, node);
2309 STACK_PUSHR(stack, voidptr, node); 2179 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2310 STACK_PUSHR(stack, int, NFL_POST_CATENATION); 2180 STACK_PUSHR(stack, voidptr, ((tre_iteration_t*)node->obj)->arg);
2311 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right ); 2181 STACK_PUSHR(stack, int, NFL_RECURSE);
2312 STACK_PUSHR(stack, int, NFL_RECURSE); 2182 break;
2313 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left) ; 2183 }
2314 STACK_PUSHR(stack, int, NFL_RECURSE); 2184 break; /* end case: NFL_RECURSE */
2315 break; 2185
2316 2186 case NFL_POST_UNION: {
2317 case ITERATION: 2187 tre_union_t* uni = (tre_union_t*)node->obj;
2318 /* Compute the attributes for the subtree, and after that for 2188 node->nullable = uni->left->nullable || uni->right->nullable;
2319 this node. */ 2189 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2320 STACK_PUSHR(stack, voidptr, node); 2190 uni->right->firstpos, NULL, 0);
2321 STACK_PUSHR(stack, int, NFL_POST_ITERATION); 2191 if (!node->firstpos)
2322 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg); 2192 return REG_ESPACE;
2323 STACK_PUSHR(stack, int, NFL_RECURSE); 2193 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2324 break; 2194 uni->right->lastpos, NULL, 0);
2325 } 2195 if (!node->lastpos)
2326 break; /* end case: NFL_RECURSE */ 2196 return REG_ESPACE;
2327 2197 break;
2328 case NFL_POST_UNION: 2198 }
2329 { 2199
2330 tre_union_t *uni = (tre_union_t *)node->obj; 2200 case NFL_POST_ITERATION: {
2331 node->nullable = uni->left->nullable || uni->right->nullable; 2201 tre_iteration_t* iter = (tre_iteration_t*)node->obj;
2332 node->firstpos = tre_set_union(mem, uni->left->firstpos, 2202
2333 uni->right->firstpos, NULL, 0); 2203 if (iter->min == 0 || iter->arg->nullable)
2334 if (!node->firstpos) 2204 node->nullable = 1;
2335 return REG_ESPACE; 2205 else
2336 node->lastpos = tre_set_union(mem, uni->left->lastpos, 2206 node->nullable = 0;
2337 uni->right->lastpos, NULL, 0); 2207 node->firstpos = iter->arg->firstpos;
2338 if (!node->lastpos) 2208 node->lastpos = iter->arg->lastpos;
2339 return REG_ESPACE; 2209 break;
2340 break; 2210 }
2341 } 2211
2342 2212 case NFL_POST_CATENATION: {
2343 case NFL_POST_ITERATION: 2213 int num_tags, *tags, assertions;
2344 { 2214 reg_errcode_t status;
2345 tre_iteration_t *iter = (tre_iteration_t *)node->obj; 2215 tre_catenation_t* cat = node->obj;
2346 2216 node->nullable = cat->left->nullable && cat->right->nullable;
2347 if (iter->min == 0 || iter->arg->nullable) 2217
2348 node->nullable = 1; 2218 /* Compute firstpos. */
2349 else 2219 if (cat->left->nullable) {
2350 node->nullable = 0; 2220 /* The left side matches the empty string. Make a first pass
2351 node->firstpos = iter->arg->firstpos; 2221 with tre_match_empty() to get the number of tags and
2352 node->lastpos = iter->arg->lastpos; 2222 parameters. */
2353 break; 2223 status = tre_match_empty(stack, cat->left, NULL, NULL, &num_tags);
2354 } 2224 if (status != REG_OK)
2355 2225 return status;
2356 case NFL_POST_CATENATION: 2226 /* Allocate arrays for the tags and parameters. */
2357 { 2227 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2358 int num_tags, *tags, assertions; 2228 if (!tags)
2359 reg_errcode_t status; 2229 return REG_ESPACE;
2360 tre_catenation_t *cat = node->obj; 2230 tags[0] = -1;
2361 node->nullable = cat->left->nullable && cat->right->nullable; 2231 assertions = 0;
2362 2232 /* Second pass with tre_mach_empty() to get the list of
2363 /* Compute firstpos. */ 2233 tags and parameters. */
2364 if (cat->left->nullable) 2234 status = tre_match_empty(stack, cat->left, tags, &assertions, NULL);
2365 { 2235 if (status != REG_OK) {
2366 /* The left side matches the empty string. Make a first pass 2236 xfree(tags);
2367 with tre_match_empty() to get the number of tags and 2237 return status;
2368 parameters. */ 2238 }
2369 status = tre_match_empty(stack, cat->left, 2239 node->firstpos = tre_set_union(mem, cat->right->firstpos,
2370 NULL, NULL, &num_tags); 2240 cat->left->firstpos, tags, assertions);
2371 if (status != REG_OK) 2241 xfree(tags);
2372 return status; 2242 if (!node->firstpos)
2373 /* Allocate arrays for the tags and parameters. */ 2243 return REG_ESPACE;
2374 tags = xmalloc(sizeof(*tags) * (num_tags + 1)); 2244 } else {
2375 if (!tags) 2245 node->firstpos = cat->left->firstpos;
2376 return REG_ESPACE; 2246 }
2377 tags[0] = -1; 2247
2378 assertions = 0; 2248 /* Compute lastpos. */
2379 /* Second pass with tre_mach_empty() to get the list of 2249 if (cat->right->nullable) {
2380 tags and parameters. */ 2250 /* The right side matches the empty string. Make a first pass
2381 status = tre_match_empty(stack, cat->left, tags, 2251 with tre_match_empty() to get the number of tags and
2382 &assertions, NULL); 2252 parameters. */
2383 if (status != REG_OK) 2253 status = tre_match_empty(stack, cat->right, NULL, NULL, &num_tags);
2384 { 2254 if (status != REG_OK)
2385 xfree(tags); 2255 return status;
2386 return status; 2256 /* Allocate arrays for the tags and parameters. */
2387 } 2257 tags = xmalloc(sizeof(int) * (num_tags + 1));
2388 node->firstpos = 2258 if (!tags)
2389 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos, 2259 return REG_ESPACE;
2390 tags, assertions); 2260 tags[0] = -1;
2391 xfree(tags); 2261 assertions = 0;
2392 if (!node->firstpos) 2262 /* Second pass with tre_mach_empty() to get the list of
2393 return REG_ESPACE; 2263 tags and parameters. */
2394 } 2264 status = tre_match_empty(stack, cat->right, tags, &assertions, NULL);
2395 else 2265 if (status != REG_OK) {
2396 { 2266 xfree(tags);
2397 node->firstpos = cat->left->firstpos; 2267 return status;
2398 } 2268 }
2399 2269 node->lastpos = tre_set_union(mem, cat->left->lastpos,
2400 /* Compute lastpos. */ 2270 cat->right->lastpos, tags, assertions);
2401 if (cat->right->nullable) 2271 xfree(tags);
2402 { 2272 if (!node->lastpos)
2403 /* The right side matches the empty string. Make a first pass 2273 return REG_ESPACE;
2404 with tre_match_empty() to get the number of tags and 2274 } else {
2405 parameters. */ 2275 node->lastpos = cat->right->lastpos;
2406 status = tre_match_empty(stack, cat->right, 2276 }
2407 NULL, NULL, &num_tags); 2277 break;
2408 if (status != REG_OK) 2278 }
2409 return status; 2279
2410 /* Allocate arrays for the tags and parameters. */ 2280 default:
2411 tags = xmalloc(sizeof(int) * (num_tags + 1)); 2281 assert(0);
2412 if (!tags) 2282 break;
2413 return REG_ESPACE;
2414 tags[0] = -1;
2415 assertions = 0;
2416 /* Second pass with tre_mach_empty() to get the list of
2417 tags and parameters. */
2418 status = tre_match_empty(stack, cat->right, tags,
2419 &assertions, NULL);
2420 if (status != REG_OK)
2421 {
2422 xfree(tags);
2423 return status;
2424 }
2425 node->lastpos =
2426 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2427 tags, assertions);
2428 xfree(tags);
2429 if (!node->lastpos)
2430 return REG_ESPACE;
2431 }
2432 else
2433 {
2434 node->lastpos = cat->right->lastpos;
2435 }
2436 break;
2437 }
2438
2439 default:
2440 assert(0);
2441 break;
2442 }
2443 } 2283 }
2284 }
2444 2285
2445 return REG_OK; 2286 return REG_OK;
2446 } 2287 }
2447 2288
2448
2449 /* Adds a transition from each position in `p1' to each position in `p2'. */ 2289 /* Adds a transition from each position in `p1' to each position in `p2'. */
2450 static reg_errcode_t 2290 static reg_errcode_t tre_make_trans(tre_pos_and_tags_t* p1,
2451 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, 2291 tre_pos_and_tags_t* p2,
2452 » tre_tnfa_transition_t *transitions, 2292 tre_tnfa_transition_t* transitions,
2453 » int *counts, int *offs) 2293 int* counts,
2454 { 2294 int* offs) {
2455 tre_pos_and_tags_t *orig_p2 = p2; 2295 tre_pos_and_tags_t* orig_p2 = p2;
2456 tre_tnfa_transition_t *trans; 2296 tre_tnfa_transition_t* trans;
2457 int i, j, k, l, dup, prev_p2_pos; 2297 int i, j, k, l, dup, prev_p2_pos;
2458 2298
2459 if (transitions != NULL) 2299 if (transitions != NULL)
2460 while (p1->position >= 0) 2300 while (p1->position >= 0) {
2461 { 2301 p2 = orig_p2;
2462 » p2 = orig_p2; 2302 prev_p2_pos = -1;
2463 » prev_p2_pos = -1; 2303 while (p2->position >= 0) {
2464 » while (p2->position >= 0) 2304 /* Optimization: if this position was already handled, skip it. */
2465 » { 2305 if (p2->position == prev_p2_pos) {
2466 » /* Optimization: if this position was already handled, skip it. */ 2306 p2++;
2467 » if (p2->position == prev_p2_pos) 2307 continue;
2468 » { 2308 }
2469 » » p2++; 2309 prev_p2_pos = p2->position;
2470 » » continue; 2310 /* Set `trans' to point to the next unused transition from
2471 » } 2311 position `p1->position'. */
2472 » prev_p2_pos = p2->position; 2312 trans = transitions + offs[p1->position];
2473 » /* Set `trans' to point to the next unused transition from 2313 while (trans->state != NULL) {
2474 » position `p1->position'. */
2475 » trans = transitions + offs[p1->position];
2476 » while (trans->state != NULL)
2477 » {
2478 #if 0 2314 #if 0
2479 /* If we find a previous transition from `p1->position' to 2315 /* If we find a previous transition from `p1->position' to
2480 `p2->position', it is overwritten. This can happen only 2316 `p2->position', it is overwritten. This can happen only
2481 if there are nested loops in the regexp, like in "((a)*)*". 2317 if there are nested loops in the regexp, like in "((a)*)*".
2482 In POSIX.2 repetition using the outer loop is always 2318 In POSIX.2 repetition using the outer loop is always
2483 preferred over using the inner loop. Therefore the 2319 preferred over using the inner loop. Therefore the
2484 transition for the inner loop is useless and can be thrown 2320 transition for the inner loop is useless and can be thrown
2485 away. */ 2321 away. */
2486 /* XXX - The same position is used for all nodes in a bracket 2322 /* XXX - The same position is used for all nodes in a bracket
2487 expression, so this optimization cannot be used (it will 2323 expression, so this optimization cannot be used (it will
2488 break bracket expressions) unless I figure out a way to 2324 break bracket expressions) unless I figure out a way to
2489 detect it here. */ 2325 detect it here. */
2490 if (trans->state_id == p2->position) 2326 if (trans->state_id == p2->position)
2491 { 2327 {
2492 break; 2328 break;
2493 } 2329 }
2494 #endif 2330 #endif
2495 » » trans++; 2331 trans++;
2496 » } 2332 }
2497 2333
2498 » if (trans->state == NULL) 2334 if (trans->state == NULL)
2499 » (trans + 1)->state = NULL; 2335 (trans + 1)->state = NULL;
2500 » /* Use the character ranges, assertions, etc. from `p1' for 2336 /* Use the character ranges, assertions, etc. from `p1' for
2501 » the transition from `p1' to `p2'. */ 2337 the transition from `p1' to `p2'. */
2502 » trans->code_min = p1->code_min; 2338 trans->code_min = p1->code_min;
2503 » trans->code_max = p1->code_max; 2339 trans->code_max = p1->code_max;
2504 » trans->state = transitions + offs[p2->position]; 2340 trans->state = transitions + offs[p2->position];
2505 » trans->state_id = p2->position; 2341 trans->state_id = p2->position;
2506 » trans->assertions = p1->assertions | p2->assertions 2342 trans->assertions =
2507 » | (p1->class ? ASSERT_CHAR_CLASS : 0) 2343 p1->assertions | p2->assertions |
2508 » | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); 2344 (p1->class ? ASSERT_CHAR_CLASS : 0) |
2509 » if (p1->backref >= 0) 2345 (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2510 » { 2346 if (p1->backref >= 0) {
2511 » » assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); 2347 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2512 » » assert(p2->backref < 0); 2348 assert(p2->backref < 0);
2513 » » trans->u.backref = p1->backref; 2349 trans->u.backref = p1->backref;
2514 » » trans->assertions |= ASSERT_BACKREF; 2350 trans->assertions |= ASSERT_BACKREF;
2515 » } 2351 } else
2516 » else 2352 trans->u.class = p1->class;
2517 » trans->u.class = p1->class; 2353 if (p1->neg_classes != NULL) {
2518 » if (p1->neg_classes != NULL) 2354 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2519 » { 2355 ;
2520 » » for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++); 2356 trans->neg_classes = xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2521 » » trans->neg_classes = 2357 if (trans->neg_classes == NULL)
2522 » » xmalloc(sizeof(*trans->neg_classes) * (i + 1)); 2358 return REG_ESPACE;
2523 » » if (trans->neg_classes == NULL) 2359 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2524 » » return REG_ESPACE; 2360 trans->neg_classes[i] = p1->neg_classes[i];
2525 » » for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) 2361 trans->neg_classes[i] = (tre_ctype_t)0;
2526 » » trans->neg_classes[i] = p1->neg_classes[i]; 2362 } else
2527 » » trans->neg_classes[i] = (tre_ctype_t)0; 2363 trans->neg_classes = NULL;
2528 » }
2529 » else
2530 » trans->neg_classes = NULL;
2531 2364
2532 » /* Find out how many tags this transition has. */ 2365 /* Find out how many tags this transition has. */
2533 » i = 0; 2366 i = 0;
2534 » if (p1->tags != NULL) 2367 if (p1->tags != NULL)
2535 » while(p1->tags[i] >= 0) 2368 while (p1->tags[i] >= 0)
2536 » » i++; 2369 i++;
2537 » j = 0; 2370 j = 0;
2538 » if (p2->tags != NULL) 2371 if (p2->tags != NULL)
2539 » while(p2->tags[j] >= 0) 2372 while (p2->tags[j] >= 0)
2540 » » j++; 2373 j++;
2541 2374
2542 » /* If we are overwriting a transition, free the old tag array. */ 2375 /* If we are overwriting a transition, free the old tag array. */
2543 » if (trans->tags != NULL) 2376 if (trans->tags != NULL)
2544 » xfree(trans->tags); 2377 xfree(trans->tags);
2545 » trans->tags = NULL; 2378 trans->tags = NULL;
2546 2379
2547 » /* If there were any tags, allocate an array and fill it. */ 2380 /* If there were any tags, allocate an array and fill it. */
2548 » if (i + j > 0) 2381 if (i + j > 0) {
2549 » { 2382 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2550 » » trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); 2383 if (!trans->tags)
2551 » » if (!trans->tags) 2384 return REG_ESPACE;
2552 » » return REG_ESPACE; 2385 i = 0;
2553 » » i = 0; 2386 if (p1->tags != NULL)
2554 » » if (p1->tags != NULL) 2387 while (p1->tags[i] >= 0) {
2555 » » while(p1->tags[i] >= 0) 2388 trans->tags[i] = p1->tags[i];
2556 » » { 2389 i++;
2557 » » trans->tags[i] = p1->tags[i]; 2390 }
2558 » » i++; 2391 l = i;
2559 » » } 2392 j = 0;
2560 » » l = i; 2393 if (p2->tags != NULL)
2561 » » j = 0; 2394 while (p2->tags[j] >= 0) {
2562 » » if (p2->tags != NULL) 2395 /* Don't add duplicates. */
2563 » » while (p2->tags[j] >= 0) 2396 dup = 0;
2564 » » { 2397 for (k = 0; k < i; k++)
2565 » » /* Don't add duplicates. */ 2398 if (trans->tags[k] == p2->tags[j]) {
2566 » » dup = 0; 2399 dup = 1;
2567 » » for (k = 0; k < i; k++) 2400 break;
2568 » » » if (trans->tags[k] == p2->tags[j]) 2401 }
2569 » » » { 2402 if (!dup)
2570 » » » dup = 1; 2403 trans->tags[l++] = p2->tags[j];
2571 » » » break; 2404 j++;
2572 » » » } 2405 }
2573 » » if (!dup) 2406 trans->tags[l] = -1;
2574 » » » trans->tags[l++] = p2->tags[j]; 2407 }
2575 » » j++;
2576 » » }
2577 » » trans->tags[l] = -1;
2578 » }
2579 2408
2580 » p2++; 2409 p2++;
2581 » }
2582 » p1++;
2583 } 2410 }
2411 p1++;
2412 }
2584 else 2413 else
2585 /* Compute a maximum limit for the number of transitions leaving 2414 /* Compute a maximum limit for the number of transitions leaving
2586 from each state. */ 2415 from each state. */
2587 while (p1->position >= 0) 2416 while (p1->position >= 0) {
2588 { 2417 p2 = orig_p2;
2589 » p2 = orig_p2; 2418 while (p2->position >= 0) {
2590 » while (p2->position >= 0) 2419 counts[p1->position]++;
2591 » { 2420 p2++;
2592 » counts[p1->position]++;
2593 » p2++;
2594 » }
2595 » p1++;
2596 } 2421 }
2422 p1++;
2423 }
2597 return REG_OK; 2424 return REG_OK;
2598 } 2425 }
2599 2426
2600 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are 2427 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2601 labelled with one character range (there are no transitions on empty 2428 labelled with one character range (there are no transitions on empty
2602 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of 2429 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2603 the regexp. */ 2430 the regexp. */
2604 static reg_errcode_t 2431 static reg_errcode_t tre_ast_to_tnfa(tre_ast_node_t* node,
2605 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions, 2432 tre_tnfa_transition_t* transitions,
2606 » » int *counts, int *offs) 2433 int* counts,
2607 { 2434 int* offs) {
2608 tre_union_t *uni; 2435 tre_union_t* uni;
2609 tre_catenation_t *cat; 2436 tre_catenation_t* cat;
2610 tre_iteration_t *iter; 2437 tre_iteration_t* iter;
2611 reg_errcode_t errcode = REG_OK; 2438 reg_errcode_t errcode = REG_OK;
2612 2439
2613 /* XXX - recurse using a stack!. */ 2440 /* XXX - recurse using a stack!. */
2614 switch (node->type) 2441 switch (node->type) {
2615 {
2616 case LITERAL: 2442 case LITERAL:
2617 break; 2443 break;
2618 case UNION: 2444 case UNION:
2619 uni = (tre_union_t *)node->obj; 2445 uni = (tre_union_t*)node->obj;
2620 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); 2446 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2621 if (errcode != REG_OK) 2447 if (errcode != REG_OK)
2622 » return errcode; 2448 return errcode;
2623 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); 2449 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2624 break; 2450 break;
2625 2451
2626 case CATENATION: 2452 case CATENATION:
2627 cat = (tre_catenation_t *)node->obj; 2453 cat = (tre_catenation_t*)node->obj;
2628 /* Add a transition from each position in cat->left->lastpos 2454 /* Add a transition from each position in cat->left->lastpos
2629 » to each position in cat->right->firstpos. */ 2455 to each position in cat->right->firstpos. */
2630 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, 2456 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2631 » » » transitions, counts, offs); 2457 transitions, counts, offs);
2632 if (errcode != REG_OK) 2458 if (errcode != REG_OK)
2633 » return errcode; 2459 return errcode;
2634 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); 2460 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2635 if (errcode != REG_OK) 2461 if (errcode != REG_OK)
2636 » return errcode; 2462 return errcode;
2637 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); 2463 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2638 break; 2464 break;
2639 2465
2640 case ITERATION: 2466 case ITERATION:
2641 iter = (tre_iteration_t *)node->obj; 2467 iter = (tre_iteration_t*)node->obj;
2642 assert(iter->max == -1 || iter->max == 1); 2468 assert(iter->max == -1 || iter->max == 1);
2643 2469
2644 if (iter->max == -1) 2470 if (iter->max == -1) {
2645 » { 2471 assert(iter->min == 0 || iter->min == 1);
2646 » assert(iter->min == 0 || iter->min == 1); 2472 /* Add a transition from each last position in the iterated
2647 » /* Add a transition from each last position in the iterated 2473 expression to each first position. */
2648 » expression to each first position. */ 2474 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2649 » errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, 2475 transitions, counts, offs);
2650 » » » » transitions, counts, offs); 2476 if (errcode != REG_OK)
2651 » if (errcode != REG_OK) 2477 return errcode;
2652 » return errcode; 2478 }
2653 » }
2654 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); 2479 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2655 break; 2480 break;
2656 } 2481 }
2657 return errcode; 2482 return errcode;
2658 } 2483 }
2659 2484
2485 #define ERROR_EXIT(err) \
2486 do { \
2487 errcode = err; \
2488 if (/*CONSTCOND*/ 1) \
2489 goto error_exit; \
2490 } while (/*CONSTCOND*/ 0)
2660 2491
2661 #define ERROR_EXIT(err)»» \ 2492 int regcomp(regex_t* restrict preg, const char* restrict regex, int cflags) {
2662 do» » » » \ 2493 tre_stack_t* stack;
2663 {» » » » \
2664 errcode = err;» » \
2665 if (/*CONSTCOND*/1)» \
2666 » goto error_exit;» \
2667 }» » » » \
2668 while (/*CONSTCOND*/0)
2669
2670
2671 int
2672 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2673 {
2674 tre_stack_t *stack;
2675 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; 2494 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2676 tre_pos_and_tags_t *p; 2495 tre_pos_and_tags_t* p;
2677 int *counts = NULL, *offs = NULL; 2496 int *counts = NULL, *offs = NULL;
2678 int i, add = 0; 2497 int i, add = 0;
2679 tre_tnfa_transition_t *transitions, *initial; 2498 tre_tnfa_transition_t *transitions, *initial;
2680 tre_tnfa_t *tnfa = NULL; 2499 tre_tnfa_t* tnfa = NULL;
2681 tre_submatch_data_t *submatch_data; 2500 tre_submatch_data_t* submatch_data;
2682 tre_tag_direction_t *tag_directions = NULL; 2501 tre_tag_direction_t* tag_directions = NULL;
2683 reg_errcode_t errcode; 2502 reg_errcode_t errcode;
2684 tre_mem_t mem; 2503 tre_mem_t mem;
2685 2504
2686 /* Parse context. */ 2505 /* Parse context. */
2687 tre_parse_ctx_t parse_ctx; 2506 tre_parse_ctx_t parse_ctx;
2688 2507
2689 /* Allocate a stack used throughout the compilation process for various 2508 /* Allocate a stack used throughout the compilation process for various
2690 purposes. */ 2509 purposes. */
2691 stack = tre_stack_new(512, 1024000, 128); 2510 stack = tre_stack_new(512, 1024000, 128);
2692 if (!stack) 2511 if (!stack)
2693 return REG_ESPACE; 2512 return REG_ESPACE;
2694 /* Allocate a fast memory allocator. */ 2513 /* Allocate a fast memory allocator. */
2695 mem = tre_mem_new(); 2514 mem = tre_mem_new();
2696 if (!mem) 2515 if (!mem) {
2697 { 2516 tre_stack_destroy(stack);
2698 tre_stack_destroy(stack); 2517 return REG_ESPACE;
2699 return REG_ESPACE; 2518 }
2700 }
2701 2519
2702 /* Parse the regexp. */ 2520 /* Parse the regexp. */
2703 memset(&parse_ctx, 0, sizeof(parse_ctx)); 2521 memset(&parse_ctx, 0, sizeof(parse_ctx));
2704 parse_ctx.mem = mem; 2522 parse_ctx.mem = mem;
2705 parse_ctx.stack = stack; 2523 parse_ctx.stack = stack;
2706 parse_ctx.re = regex; 2524 parse_ctx.re = regex;
2707 parse_ctx.cflags = cflags; 2525 parse_ctx.cflags = cflags;
2708 parse_ctx.max_backref = -1; 2526 parse_ctx.max_backref = -1;
2709 errcode = tre_parse(&parse_ctx); 2527 errcode = tre_parse(&parse_ctx);
2710 if (errcode != REG_OK) 2528 if (errcode != REG_OK)
(...skipping 12 matching lines...) Expand all
2723 /* Allocate the TNFA struct. */ 2541 /* Allocate the TNFA struct. */
2724 tnfa = xcalloc(1, sizeof(tre_tnfa_t)); 2542 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2725 if (tnfa == NULL) 2543 if (tnfa == NULL)
2726 ERROR_EXIT(REG_ESPACE); 2544 ERROR_EXIT(REG_ESPACE);
2727 tnfa->have_backrefs = parse_ctx.max_backref >= 0; 2545 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2728 tnfa->have_approx = 0; 2546 tnfa->have_approx = 0;
2729 tnfa->num_submatches = parse_ctx.submatch_id; 2547 tnfa->num_submatches = parse_ctx.submatch_id;
2730 2548
2731 /* Set up tags for submatch addressing. If REG_NOSUB is set and the 2549 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2732 regexp does not have back references, this can be skipped. */ 2550 regexp does not have back references, this can be skipped. */
2733 if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) 2551 if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) {
2734 { 2552 /* Figure out how many tags we will need. */
2553 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2554 if (errcode != REG_OK)
2555 ERROR_EXIT(errcode);
2735 2556
2736 /* Figure out how many tags we will need. */ 2557 if (tnfa->num_tags > 0) {
2737 errcode = tre_add_tags(NULL, stack, tree, tnfa); 2558 tag_directions = xmalloc(sizeof(*tag_directions) * (tnfa->num_tags + 1));
2738 if (errcode != REG_OK) 2559 if (tag_directions == NULL)
2739 » ERROR_EXIT(errcode); 2560 ERROR_EXIT(REG_ESPACE);
2561 tnfa->tag_directions = tag_directions;
2562 memset(tag_directions, -1,
2563 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2564 }
2565 tnfa->minimal_tags =
2566 xcalloc((unsigned)tnfa->num_tags * 2 + 1, sizeof(*tnfa->minimal_tags));
2567 if (tnfa->minimal_tags == NULL)
2568 ERROR_EXIT(REG_ESPACE);
2740 2569
2741 if (tnfa->num_tags > 0) 2570 submatch_data =
2742 » { 2571 xcalloc((unsigned)parse_ctx.submatch_id, sizeof(*submatch_data));
2743 » tag_directions = xmalloc(sizeof(*tag_directions) 2572 if (submatch_data == NULL)
2744 » » » » * (tnfa->num_tags + 1)); 2573 ERROR_EXIT(REG_ESPACE);
2745 » if (tag_directions == NULL) 2574 tnfa->submatch_data = submatch_data;
2746 » ERROR_EXIT(REG_ESPACE);
2747 » tnfa->tag_directions = tag_directions;
2748 » memset(tag_directions, -1,
2749 » » sizeof(*tag_directions) * (tnfa->num_tags + 1));
2750 » }
2751 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2752 » » » » sizeof(*tnfa->minimal_tags));
2753 if (tnfa->minimal_tags == NULL)
2754 » ERROR_EXIT(REG_ESPACE);
2755 2575
2756 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id, 2576 errcode = tre_add_tags(mem, stack, tree, tnfa);
2757 » » » sizeof(*submatch_data)); 2577 if (errcode != REG_OK)
2758 if (submatch_data == NULL) 2578 ERROR_EXIT(errcode);
2759 » ERROR_EXIT(REG_ESPACE); 2579 }
2760 tnfa->submatch_data = submatch_data;
2761
2762 errcode = tre_add_tags(mem, stack, tree, tnfa);
2763 if (errcode != REG_OK)
2764 » ERROR_EXIT(errcode);
2765
2766 }
2767 2580
2768 /* Expand iteration nodes. */ 2581 /* Expand iteration nodes. */
2769 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position, 2582 errcode =
2770 » » » tag_directions); 2583 tre_expand_ast(mem, stack, tree, &parse_ctx.position, tag_directions);
2771 if (errcode != REG_OK) 2584 if (errcode != REG_OK)
2772 ERROR_EXIT(errcode); 2585 ERROR_EXIT(errcode);
2773 2586
2774 /* Add a dummy node for the final state. 2587 /* Add a dummy node for the final state.
2775 XXX - For certain patterns this dummy node can be optimized away, 2588 XXX - For certain patterns this dummy node can be optimized away,
2776 » for example "a*" or "ab*".» Figure out a simple way to detect 2589 for example "a*" or "ab*".» Figure out a simple way to detect
2777 » this possibility. */ 2590 this possibility. */
2778 tmp_ast_l = tree; 2591 tmp_ast_l = tree;
2779 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++); 2592 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2780 if (tmp_ast_r == NULL) 2593 if (tmp_ast_r == NULL)
2781 ERROR_EXIT(REG_ESPACE); 2594 ERROR_EXIT(REG_ESPACE);
2782 2595
2783 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); 2596 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2784 if (tree == NULL) 2597 if (tree == NULL)
2785 ERROR_EXIT(REG_ESPACE); 2598 ERROR_EXIT(REG_ESPACE);
2786 2599
2787 errcode = tre_compute_nfl(mem, stack, tree); 2600 errcode = tre_compute_nfl(mem, stack, tree);
2788 if (errcode != REG_OK) 2601 if (errcode != REG_OK)
2789 ERROR_EXIT(errcode); 2602 ERROR_EXIT(errcode);
2790 2603
2791 counts = xmalloc(sizeof(int) * parse_ctx.position); 2604 counts = xmalloc(sizeof(int) * parse_ctx.position);
2792 if (counts == NULL) 2605 if (counts == NULL)
2793 ERROR_EXIT(REG_ESPACE); 2606 ERROR_EXIT(REG_ESPACE);
2794 2607
2795 offs = xmalloc(sizeof(int) * parse_ctx.position); 2608 offs = xmalloc(sizeof(int) * parse_ctx.position);
2796 if (offs == NULL) 2609 if (offs == NULL)
2797 ERROR_EXIT(REG_ESPACE); 2610 ERROR_EXIT(REG_ESPACE);
2798 2611
2799 for (i = 0; i < parse_ctx.position; i++) 2612 for (i = 0; i < parse_ctx.position; i++)
2800 counts[i] = 0; 2613 counts[i] = 0;
2801 tre_ast_to_tnfa(tree, NULL, counts, NULL); 2614 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2802 2615
2803 add = 0; 2616 add = 0;
2804 for (i = 0; i < parse_ctx.position; i++) 2617 for (i = 0; i < parse_ctx.position; i++) {
2805 { 2618 offs[i] = add;
2806 offs[i] = add; 2619 add += counts[i] + 1;
2807 add += counts[i] + 1; 2620 counts[i] = 0;
2808 counts[i] = 0; 2621 }
2809 }
2810 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions)); 2622 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2811 if (transitions == NULL) 2623 if (transitions == NULL)
2812 ERROR_EXIT(REG_ESPACE); 2624 ERROR_EXIT(REG_ESPACE);
2813 tnfa->transitions = transitions; 2625 tnfa->transitions = transitions;
2814 tnfa->num_transitions = add; 2626 tnfa->num_transitions = add;
2815 2627
2816 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); 2628 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2817 if (errcode != REG_OK) 2629 if (errcode != REG_OK)
2818 ERROR_EXIT(errcode); 2630 ERROR_EXIT(errcode);
2819 2631
2820 tnfa->firstpos_chars = NULL; 2632 tnfa->firstpos_chars = NULL;
2821 2633
2822 p = tree->firstpos; 2634 p = tree->firstpos;
2823 i = 0; 2635 i = 0;
2824 while (p->position >= 0) 2636 while (p->position >= 0) {
2825 { 2637 i++;
2826 i++; 2638 p++;
2827 p++; 2639 }
2828 }
2829 2640
2830 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t)); 2641 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2831 if (initial == NULL) 2642 if (initial == NULL)
2832 ERROR_EXIT(REG_ESPACE); 2643 ERROR_EXIT(REG_ESPACE);
2833 tnfa->initial = initial; 2644 tnfa->initial = initial;
2834 2645
2835 i = 0; 2646 i = 0;
2836 for (p = tree->firstpos; p->position >= 0; p++) 2647 for (p = tree->firstpos; p->position >= 0; p++) {
2837 { 2648 initial[i].state = transitions + offs[p->position];
2838 initial[i].state = transitions + offs[p->position]; 2649 initial[i].state_id = p->position;
2839 initial[i].state_id = p->position; 2650 initial[i].tags = NULL;
2840 initial[i].tags = NULL; 2651 /* Copy the arrays p->tags, and p->params, they are allocated
2841 /* Copy the arrays p->tags, and p->params, they are allocated 2652 from a tre_mem object. */
2842 » from a tre_mem object. */ 2653 if (p->tags) {
2843 if (p->tags) 2654 int j;
2844 » { 2655 for (j = 0; p->tags[j] >= 0; j++)
2845 » int j; 2656 ;
2846 » for (j = 0; p->tags[j] >= 0; j++); 2657 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2847 » initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); 2658 if (!initial[i].tags)
2848 » if (!initial[i].tags) 2659 ERROR_EXIT(REG_ESPACE);
2849 » ERROR_EXIT(REG_ESPACE); 2660 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2850 » memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2851 » }
2852 initial[i].assertions = p->assertions;
2853 i++;
2854 } 2661 }
2662 initial[i].assertions = p->assertions;
2663 i++;
2664 }
2855 initial[i].state = NULL; 2665 initial[i].state = NULL;
2856 2666
2857 tnfa->num_transitions = add; 2667 tnfa->num_transitions = add;
2858 tnfa->final = transitions + offs[tree->lastpos[0].position]; 2668 tnfa->final = transitions + offs[tree->lastpos[0].position];
2859 tnfa->num_states = parse_ctx.position; 2669 tnfa->num_states = parse_ctx.position;
2860 tnfa->cflags = cflags; 2670 tnfa->cflags = cflags;
2861 2671
2862 tre_mem_destroy(mem); 2672 tre_mem_destroy(mem);
2863 tre_stack_destroy(stack); 2673 tre_stack_destroy(stack);
2864 xfree(counts); 2674 xfree(counts);
2865 xfree(offs); 2675 xfree(offs);
2866 2676
2867 preg->TRE_REGEX_T_FIELD = (void *)tnfa; 2677 preg->TRE_REGEX_T_FIELD = (void*)tnfa;
2868 return REG_OK; 2678 return REG_OK;
2869 2679
2870 error_exit: 2680 error_exit:
2871 /* Free everything that was allocated and return the error code. */ 2681 /* Free everything that was allocated and return the error code. */
2872 tre_mem_destroy(mem); 2682 tre_mem_destroy(mem);
2873 if (stack != NULL) 2683 if (stack != NULL)
2874 tre_stack_destroy(stack); 2684 tre_stack_destroy(stack);
2875 if (counts != NULL) 2685 if (counts != NULL)
2876 xfree(counts); 2686 xfree(counts);
2877 if (offs != NULL) 2687 if (offs != NULL)
2878 xfree(offs); 2688 xfree(offs);
2879 preg->TRE_REGEX_T_FIELD = (void *)tnfa; 2689 preg->TRE_REGEX_T_FIELD = (void*)tnfa;
2880 regfree(preg); 2690 regfree(preg);
2881 return errcode; 2691 return errcode;
2882 } 2692 }
2883 2693
2694 void regfree(regex_t* preg) {
2695 tre_tnfa_t* tnfa;
2696 unsigned int i;
2697 tre_tnfa_transition_t* trans;
2884 2698
2885 2699 tnfa = (void*)preg->TRE_REGEX_T_FIELD;
2886
2887 void
2888 regfree(regex_t *preg)
2889 {
2890 tre_tnfa_t *tnfa;
2891 unsigned int i;
2892 tre_tnfa_transition_t *trans;
2893
2894 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2895 if (!tnfa) 2700 if (!tnfa)
2896 return; 2701 return;
2897 2702
2898 for (i = 0; i < tnfa->num_transitions; i++) 2703 for (i = 0; i < tnfa->num_transitions; i++)
2899 if (tnfa->transitions[i].state) 2704 if (tnfa->transitions[i].state) {
2900 { 2705 if (tnfa->transitions[i].tags)
2901 » if (tnfa->transitions[i].tags) 2706 xfree(tnfa->transitions[i].tags);
2902 » xfree(tnfa->transitions[i].tags); 2707 if (tnfa->transitions[i].neg_classes)
2903 » if (tnfa->transitions[i].neg_classes) 2708 xfree(tnfa->transitions[i].neg_classes);
2904 » xfree(tnfa->transitions[i].neg_classes); 2709 }
2905 }
2906 if (tnfa->transitions) 2710 if (tnfa->transitions)
2907 xfree(tnfa->transitions); 2711 xfree(tnfa->transitions);
2908 2712
2909 if (tnfa->initial) 2713 if (tnfa->initial) {
2910 { 2714 for (trans = tnfa->initial; trans->state; trans++) {
2911 for (trans = tnfa->initial; trans->state; trans++) 2715 if (trans->tags)
2912 » { 2716 xfree(trans->tags);
2913 » if (trans->tags)
2914 » xfree(trans->tags);
2915 » }
2916 xfree(tnfa->initial);
2917 } 2717 }
2718 xfree(tnfa->initial);
2719 }
2918 2720
2919 if (tnfa->submatch_data) 2721 if (tnfa->submatch_data) {
2920 { 2722 for (i = 0; i < tnfa->num_submatches; i++)
2921 for (i = 0; i < tnfa->num_submatches; i++) 2723 if (tnfa->submatch_data[i].parents)
2922 » if (tnfa->submatch_data[i].parents) 2724 xfree(tnfa->submatch_data[i].parents);
2923 » xfree(tnfa->submatch_data[i].parents); 2725 xfree(tnfa->submatch_data);
2924 xfree(tnfa->submatch_data); 2726 }
2925 }
2926 2727
2927 if (tnfa->tag_directions) 2728 if (tnfa->tag_directions)
2928 xfree(tnfa->tag_directions); 2729 xfree(tnfa->tag_directions);
2929 if (tnfa->firstpos_chars) 2730 if (tnfa->firstpos_chars)
2930 xfree(tnfa->firstpos_chars); 2731 xfree(tnfa->firstpos_chars);
2931 if (tnfa->minimal_tags) 2732 if (tnfa->minimal_tags)
2932 xfree(tnfa->minimal_tags); 2733 xfree(tnfa->minimal_tags);
2933 xfree(tnfa); 2734 xfree(tnfa);
2934 } 2735 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698