fusl/src/regex/regcomp.c - Issue 1573973002: Add a "fork" of musl as //fusl.

Side by Side Diff: fusl/src/regex/regcomp.c

Issue 1573973002: Add a "fork" of musl as //fusl. (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 /*

	2 regcomp.c - TRE POSIX compatible regex compilation functions.

	3

	4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>

	5 All rights reserved.

	6

	7 Redistribution and use in source and binary forms, with or without

	8 modification, are permitted provided that the following conditions

	9 are met:

	10

	11 1. Redistributions of source code must retain the above copyright

	12 notice, this list of conditions and the following disclaimer.

	13

	14 2. Redistributions in binary form must reproduce the above copyright

	15 notice, this list of conditions and the following disclaimer in the

	16 documentation and/or other materials provided with the distribution.

	17

	18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS

	19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	29

	30 */

	31

	32 #include <string.h>

	33 #include <stdlib.h>

	34 #include <regex.h>

	35 #include <limits.h>

	36 #include <stdint.h>

	37 #include <ctype.h>

	38

	39 #include "tre.h"

	40

	41 #include <assert.h>

	42

	43 /***********************************************************************

	44 from tre-compile.h

	45 ***********************************************************************/

	46

	47 typedef struct {

	48 int position;

	49 int code_min;

	50 int code_max;

	51 int *tags;

	52 int assertions;

	53 tre_ctype_t class;

	54 tre_ctype_t *neg_classes;

	55 int backref;

	56 } tre_pos_and_tags_t;

	57

	58

	59 /***********************************************************************

	60 from tre-ast.c and tre-ast.h

	61 ***********************************************************************/

	62

	63 /* The different AST node types. */

	64 typedef enum {

	65 LITERAL,

	66 CATENATION,

	67 ITERATION,

	68 UNION

	69 } tre_ast_type_t;

	70

	71 /* Special subtypes of TRE_LITERAL. */

	72 #define EMPTY -1 /* Empty leaf (denotes empty string). */

	73 #define ASSERTION -2 /* Assertion leaf. */

	74 #define TAG -3 /* Tag leaf. */

	75 #define BACKREF -4 /* Back reference leaf. */

	76

	77 #define IS_SPECIAL(x) ((x)->code_min < 0)

	78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)

	79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)

	80 #define IS_TAG(x) ((x)->code_min == TAG)

	81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)

	82

	83

	84 /* A generic AST node. All AST nodes consist of this node on the top

	85 level with `obj' pointing to the actual content. */

	86 typedef struct {

	87 tre_ast_type_t type; /* Type of the node. */

	88 void obj; / Pointer to actual node. */

	89 int nullable;

	90 int submatch_id;

	91 int num_submatches;

	92 int num_tags;

	93 tre_pos_and_tags_t *firstpos;

	94 tre_pos_and_tags_t *lastpos;

	95 } tre_ast_node_t;

	96

	97

	98 /* A "literal" node. These are created for assertions, back references,

	99 tags, matching parameter settings, and all expressions that match one

	100 character. */

	101 typedef struct {

	102 long code_min;

	103 long code_max;

	104 int position;

	105 tre_ctype_t class;

	106 tre_ctype_t *neg_classes;

	107 } tre_literal_t;

	108

	109 /* A "catenation" node. These are created when two regexps are concatenated.

	110 If there are more than one subexpressions in sequence, the `left' part

	111 holds all but the last, and `right' part holds the last subexpression

	112 (catenation is left associative). */

	113 typedef struct {

	114 tre_ast_node_t *left;

	115 tre_ast_node_t *right;

	116 } tre_catenation_t;

	117

	118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"

	119 operators. */

	120 typedef struct {

	121 /* Subexpression to match. */

	122 tre_ast_node_t *arg;

	123 /* Minimum number of consecutive matches. */

	124 int min;

	125 /* Maximum number of consecutive matches. */

	126 int max;

	127 /* If 0, match as many characters as possible, if 1 match as few as

	128 possible. Note that this does not always mean the same thing as

	129 matching as many/few repetitions as possible. */

	130 unsigned int minimal:1;

	131 } tre_iteration_t;

	132

	133 /* An "union" node. These are created for the "\|" operator. */

	134 typedef struct {

	135 tre_ast_node_t *left;

	136 tre_ast_node_t *right;

	137 } tre_union_t;

	138

	139

	140 static tre_ast_node_t *

	141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)

	142 {

	143 tre_ast_node_t node = tre_mem_calloc(mem, sizeof node);

	144 if (!node \|\| !obj)

	145 return 0;

	146 node->obj = obj;

	147 node->type = type;

	148 node->nullable = -1;

	149 node->submatch_id = -1;

	150 return node;

	151 }

	152

	153 static tre_ast_node_t *

	154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)

	155 {

	156 tre_ast_node_t *node;

	157 tre_literal_t *lit;

	158

	159 lit = tre_mem_calloc(mem, sizeof *lit);

	160 node = tre_ast_new_node(mem, LITERAL, lit);

	161 if (!node)

	162 return 0;

	163 lit->code_min = code_min;

	164 lit->code_max = code_max;

	165 lit->position = position;

	166 return node;

	167 }

	168

	169 static tre_ast_node_t *

	170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minim al)

	171 {

	172 tre_ast_node_t *node;

	173 tre_iteration_t *iter;

	174

	175 iter = tre_mem_calloc(mem, sizeof *iter);

	176 node = tre_ast_new_node(mem, ITERATION, iter);

	177 if (!node)

	178 return 0;

	179 iter->arg = arg;

	180 iter->min = min;

	181 iter->max = max;

	182 iter->minimal = minimal;

	183 node->num_submatches = arg->num_submatches;

	184 return node;

	185 }

	186

	187 static tre_ast_node_t *

	188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t right)

	189 {

	190 tre_ast_node_t *node;

	191 tre_union_t *un;

	192

	193 if (!left)

	194 return right;

	195 un = tre_mem_calloc(mem, sizeof *un);

	196 node = tre_ast_new_node(mem, UNION, un);

	197 if (!node \|\| !right)

	198 return 0;

	199 un->left = left;

	200 un->right = right;

	201 node->num_submatches = left->num_submatches + right->num_submatches;

	202 return node;

	203 }

	204

	205 static tre_ast_node_t *

	206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t righ t)

	207 {

	208 tre_ast_node_t *node;

	209 tre_catenation_t *cat;

	210

	211 if (!left)

	212 return right;

	213 cat = tre_mem_calloc(mem, sizeof *cat);

	214 node = tre_ast_new_node(mem, CATENATION, cat);

	215 if (!node)

	216 return 0;

	217 cat->left = left;

	218 cat->right = right;

	219 node->num_submatches = left->num_submatches + right->num_submatches;

	220 return node;

	221 }

	222

	223

	224 /***********************************************************************

	225 from tre-stack.c and tre-stack.h

	226 ***********************************************************************/

	227

	228 typedef struct tre_stack_rec tre_stack_t;

	229

	230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'

	231 is maximum size, and `increment' specifies how much more space will be

	232 allocated with realloc() if all space gets used up. Returns the stack

	233 object or NULL if out of memory. */

	234 static tre_stack_t *

	235 tre_stack_new(int size, int max_size, int increment);

	236

	237 /* Frees the stack object. */

	238 static void

	239 tre_stack_destroy(tre_stack_t *s);

	240

	241 /* Returns the current number of objects in the stack. */

	242 static int

	243 tre_stack_num_objects(tre_stack_t *s);

	244

	245 /* Each tre_stack_push_(tre_stack_t s, <type> value) function pushes

	246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.

	247 This tries to realloc() more space before failing if maximum size

	248 has not yet been reached. Returns REG_OK if successful. */

	249 #define declare_pushf(typetag, type) \

	250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)

	251

	252 declare_pushf(voidptr, void *);

	253 declare_pushf(int, int);

	254

	255 /* Each tre_stack_pop_(tre_stack_t s) function pops the topmost

	256 element off of stack `s' and returns it. The stack must not be

	257 empty. */

	258 #define declare_popf(typetag, type) \

	259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)

	260

	261 declare_popf(voidptr, void *);

	262 declare_popf(int, int);

	263

	264 /* Just to save some typing. */

	265 #define STACK_PUSH(s, typetag, value) \

	266 do \

	267 { \

	268 status = tre_stack_push_ ## typetag(s, value); \

	269 } \

	270 while (/CONSTCOND/0)

	271

	272 #define STACK_PUSHX(s, typetag, value) \

	273 { \

	274 status = tre_stack_push_ ## typetag(s, value); \

	275 if (status != REG_OK) \

	276 break; \

	277 }

	278

	279 #define STACK_PUSHR(s, typetag, value) \

	280 { \

	281 reg_errcode_t _status; \

	282 _status = tre_stack_push_ ## typetag(s, value); \

	283 if (_status != REG_OK) \

	284 return _status; \

	285 }

	286

	287 union tre_stack_item {

	288 void *voidptr_value;

	289 int int_value;

	290 };

	291

	292 struct tre_stack_rec {

	293 int size;

	294 int max_size;

	295 int increment;

	296 int ptr;

	297 union tre_stack_item *stack;

	298 };

	299

	300

	301 static tre_stack_t *

	302 tre_stack_new(int size, int max_size, int increment)

	303 {

	304 tre_stack_t *s;

	305

	306 s = xmalloc(sizeof(*s));

	307 if (s != NULL)

	308 {

	309 s->stack = xmalloc(sizeof(s->stack) size);

	310 if (s->stack == NULL)

	311 {

	312 xfree(s);

	313 return NULL;

	314 }

	315 s->size = size;

	316 s->max_size = max_size;

	317 s->increment = increment;

	318 s->ptr = 0;

	319 }

	320 return s;

	321 }

	322

	323 static void

	324 tre_stack_destroy(tre_stack_t *s)

	325 {

	326 xfree(s->stack);

	327 xfree(s);

	328 }

	329

	330 static int

	331 tre_stack_num_objects(tre_stack_t *s)

	332 {

	333 return s->ptr;

	334 }

	335

	336 static reg_errcode_t

	337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)

	338 {

	339 if (s->ptr < s->size)

	340 {

	341 s->stack[s->ptr] = value;

	342 s->ptr++;

	343 }

	344 else

	345 {

	346 if (s->size >= s->max_size)

	347 {

	348 return REG_ESPACE;

	349 }

	350 else

	351 {

	352 union tre_stack_item *new_buffer;

	353 int new_size;

	354 new_size = s->size + s->increment;

	355 if (new_size > s->max_size)

	356 new_size = s->max_size;

	357 new_buffer = xrealloc(s->stack, sizeof(new_buffer) new_size);

	358 if (new_buffer == NULL)

	359 {

	360 return REG_ESPACE;

	361 }

	362 assert(new_size > s->size);

	363 s->size = new_size;

	364 s->stack = new_buffer;

	365 tre_stack_push(s, value);

	366 }

	367 }

	368 return REG_OK;

	369 }

	370

	371 #define define_pushf(typetag, type) \

	372 declare_pushf(typetag, type) { \

	373 union tre_stack_item item; \

	374 item.typetag ## _value = value; \

	375 return tre_stack_push(s, item); \

	376 }

	377

	378 define_pushf(int, int)

	379 define_pushf(voidptr, void *)

	380

	381 #define define_popf(typetag, type) \

	382 declare_popf(typetag, type) { \

	383 return s->stack[--s->ptr].typetag ## _value; \

	384 }

	385

	386 define_popf(int, int)

	387 define_popf(voidptr, void *)

	388

	389

	390 /***********************************************************************

	391 from tre-parse.c and tre-parse.h

	392 ***********************************************************************/

	393

	394 /* Parse context. */

	395 typedef struct {

	396 /* Memory allocator. The AST is allocated using this. */

	397 tre_mem_t mem;

	398 /* Stack used for keeping track of regexp syntax. */

	399 tre_stack_t *stack;

	400 /* The parsed node after a parse function returns. */

	401 tre_ast_node_t *n;

	402 /* Position in the regexp pattern after a parse function returns. */

	403 const char *s;

	404 /* The first character of the regexp. */

	405 const char *re;

	406 /* Current submatch ID. */

	407 int submatch_id;

	408 /* Current position (number of literal). */

	409 int position;

	410 /* The highest back reference or -1 if none seen so far. */

	411 int max_backref;

	412 /* Compilation flags. */

	413 int cflags;

	414 } tre_parse_ctx_t;

	415

	416 /* Some macros for expanding \w, \s, etc. */

	417 static const struct {

	418 char c;

	419 const char *expansion;

	420 } tre_macros[] = {

	421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},

	422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},

	423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},

	424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},

	425 { 0, 0 }

	426 };

	427

	428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which

	429 must have at least `len' items. Sets buf[0] to zero if the there

	430 is no match in `tre_macros'. */

	431 static const char tre_expand_macro(const char s)

	432 {

	433 int i;

	434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);

	435 return tre_macros[i].expansion;

	436 }

	437

	438 static int

	439 tre_compare_lit(const void a, const void b)

	440 {

	441 const tre_literal_t const la = a;

	442 const tre_literal_t const lb = b;

	443 /* assumes the range of valid code_min is < INT_MAX */

	444 return la[0]->code_min - lb[0]->code_min;

	445 }

	446

	447 struct literals {

	448 tre_mem_t mem;

	449 tre_literal_t **a;

	450 int len;

	451 int cap;

	452 };

	453

	454 static tre_literal_t tre_new_lit(struct literals p)

	455 {

	456 tre_literal_t **a;

	457 if (p->len >= p->cap) {

	458 if (p->cap >= 1<<15)

	459 return 0;

	460 p->cap *= 2;

	461 a = xrealloc(p->a, p->cap * sizeof *p->a);

	462 if (!a)

	463 return 0;

	464 p->a = a;

	465 }

	466 a = p->a + p->len++;

	467 a = tre_mem_calloc(p->mem, sizeof *a);

	468 return *a;

	469 }

	470

	471 static int add_icase_literals(struct literals *ls, int min, int max)

	472 {

	473 tre_literal_t *lit;

	474 int b, e, c;

	475 for (c=min; c<=max; ) {

	476 /* assumes islower(c) and isupper(c) are exclusive

	477 and toupper(c)!=c if islower(c).

	478 multiple opposite case characters are not supported */

	479 if (tre_islower(c)) {

	480 b = e = tre_toupper(c);

	481 for (c++, e++; c<=max; c++, e++)

	482 if (tre_toupper(c) != e) break;

	483 } else if (tre_isupper(c)) {

	484 b = e = tre_tolower(c);

	485 for (c++, e++; c<=max; c++, e++)

	486 if (tre_tolower(c) != e) break;

	487 } else {

	488 c++;

	489 continue;

	490 }

	491 lit = tre_new_lit(ls);

	492 if (!lit)

	493 return -1;

	494 lit->code_min = b;

	495 lit->code_max = e-1;

	496 lit->position = -1;

	497 }

	498 return 0;

	499 }

	500

	501

	502 /* Maximum number of character classes in a negated bracket expression. */

	503 #define MAX_NEG_CLASSES 64

	504

	505 struct neg {

	506 int negate;

	507 int len;

	508 tre_ctype_t a[MAX_NEG_CLASSES];

	509 };

	510

	511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges

	512

	513 /*

	514 bracket grammar:

	515 Bracket = '[' List ']' \| '[^' List ']'

	516 List = Term \| List Term

	517 Term = Char \| Range \| Chclass \| Eqclass

	518 Range = Char '-' Char \| Char '-' '-'

	519 Char = Coll \| coll_single

	520 Meta = ']' \| '-'

	521 Coll = '[.' coll_single '.]' \| '[.' coll_multi '.]' \| '[.' Meta '.]'

	522 Eqclass = '[=' coll_single '=]' \| '[=' coll_multi '=]'

	523 Chclass = '[:' class ':]'

	524

	525 coll_single is a single char collating element but it can be

	526 '-' only at the beginning or end of a List and

	527 ']' only at the beginning of a List and

	528 '^' anywhere except after the openning '['

	529 */

	530

	531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t ctx, const char s, st ruct literals ls, struct neg neg)

	532 {

	533 const char *start = s;

	534 tre_ctype_t class;

	535 int min, max;

	536 wchar_t wc;

	537 int len;

	538

	539 for (;;) {

	540 class = 0;

	541 len = mbtowc(&wc, s, -1);

	542 if (len <= 0)

	543 return *s ? REG_BADPAT : REG_EBRACK;

	544 if (*s == ']' && s != start) {

	545 ctx->s = s+1;

	546 return REG_OK;

	547 }

	548 if (*s == '-' && s != start && s[1] != ']' &&

	549 /* extension: [a-z--@] is accepted as [a-z]\|[--@] */

	550 (s[1] != '-' \|\| s[2] == ']'))

	551 return REG_ERANGE;

	552 if (*s == '[' && (s[1] == '.' \|\| s[1] == '='))

	553 /* collating symbols and equivalence classes are not sup ported */

	554 return REG_ECOLLATE;

	555 if (*s == '[' && s[1] == ':') {

	556 char tmp[CHARCLASS_NAME_MAX+1];

	557 s += 2;

	558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {

	559 if (s[len] == ':') {

	560 memcpy(tmp, s, len);

	561 tmp[len] = 0;

	562 class = tre_ctype(tmp);

	563 break;

	564 }

	565 }

	566 if (!class \|\| s[len+1] != ']')

	567 return REG_ECTYPE;

	568 min = 0;

	569 max = TRE_CHAR_MAX;

	570 s += len+2;

	571 } else {

	572 min = max = wc;

	573 s += len;

	574 if (*s == '-' && s[1] != ']') {

	575 s++;

	576 len = mbtowc(&wc, s, -1);

	577 max = wc;

	578 /* XXX - Should use collation order instead of

	579 encoding values in character ranges. */

	580 if (len <= 0 \|\| min > max)

	581 return REG_ERANGE;

	582 s += len;

	583 }

	584 }

	585

	586 if (class && neg->negate) {

	587 if (neg->len >= MAX_NEG_CLASSES)

	588 return REG_ESPACE;

	589 neg->a[neg->len++] = class;

	590 } else {

	591 tre_literal_t *lit = tre_new_lit(ls);

	592 if (!lit)

	593 return REG_ESPACE;

	594 lit->code_min = min;

	595 lit->code_max = max;

	596 lit->class = class;

	597 lit->position = -1;

	598

	599 /* Add opposite-case codepoints if REG_ICASE is present.

	600 It seems that POSIX requires that bracket negation

	601 should happen before case-folding, but most practical

	602 implementations do it the other way around. Changing

	603 the order would need efficient representation of

	604 case-fold ranges and bracket range sets even with

	605 simple patterns so this is ok for now. */

	606 if (ctx->cflags & REG_ICASE && !class)

	607 if (add_icase_literals(ls, min, max))

	608 return REG_ESPACE;

	609 }

	610 }

	611 }

	612

	613 static reg_errcode_t parse_bracket(tre_parse_ctx_t ctx, const char s)

	614 {

	615 int i, max, min, negmax, negmin;

	616 tre_ast_node_t node = 0, n;

	617 tre_ctype_t *nc = 0;

	618 tre_literal_t *lit;

	619 struct literals ls;

	620 struct neg neg;

	621 reg_errcode_t err;

	622

	623 ls.mem = ctx->mem;

	624 ls.len = 0;

	625 ls.cap = 32;

	626 ls.a = xmalloc(ls.cap * sizeof *ls.a);

	627 if (!ls.a)

	628 return REG_ESPACE;

	629 neg.len = 0;

	630 neg.negate = *s == '^';

	631 if (neg.negate)

	632 s++;

	633

	634 err = parse_bracket_terms(ctx, s, &ls, &neg);

	635 if (err != REG_OK)

	636 goto parse_bracket_done;

	637

	638 if (neg.negate) {

	639 /* Sort the array if we need to negate it. */

	640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);

	641 /* extra lit for the last negated range */

	642 lit = tre_new_lit(&ls);

	643 if (!lit) {

	644 err = REG_ESPACE;

	645 goto parse_bracket_done;

	646 }

	647 lit->code_min = TRE_CHAR_MAX+1;

	648 lit->code_max = TRE_CHAR_MAX+1;

	649 lit->position = -1;

	650 /* negated classes */

	651 if (neg.len) {

	652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)sizeof neg.a);

	653 if (!nc) {

	654 err = REG_ESPACE;

	655 goto parse_bracket_done;

	656 }

	657 memcpy(nc, neg.a, neg.lensizeof neg.a);

	658 nc[neg.len] = 0;

	659 }

	660 }

	661

	662 /* Build a union of the items in the array, negated if necessary. */

	663 negmax = negmin = 0;

	664 for (i = 0; i < ls.len; i++) {

	665 lit = ls.a[i];

	666 min = lit->code_min;

	667 max = lit->code_max;

	668 if (neg.negate) {

	669 if (min <= negmin) {

	670 /* Overlap. */

	671 negmin = MAX(max + 1, negmin);

	672 continue;

	673 }

	674 negmax = min - 1;

	675 lit->code_min = negmin;

	676 lit->code_max = negmax;

	677 negmin = max + 1;

	678 }

	679 lit->position = ctx->position;

	680 lit->neg_classes = nc;

	681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);

	682 node = tre_ast_new_union(ctx->mem, node, n);

	683 if (!node) {

	684 err = REG_ESPACE;

	685 break;

	686 }

	687 }

	688

	689 parse_bracket_done:

	690 xfree(ls.a);

	691 ctx->position++;

	692 ctx->n = node;

	693 return err;

	694 }

	695

	696 static const char parse_dup_count(const char s, int *n)

	697 {

	698 *n = -1;

	699 if (!isdigit(*s))

	700 return s;

	701 *n = 0;

	702 for (;;) {

	703 n = 10 n + (s - '0');

	704 s++;

	705 if (!isdigit(s) \|\| n > RE_DUP_MAX)

	706 break;

	707 }

	708 return s;

	709 }

	710

	711 static reg_errcode_t parse_dup(tre_parse_ctx_t ctx, const char s)

	712 {

	713 int min, max;

	714

	715 s = parse_dup_count(s, &min);

	716 if (*s == ',')

	717 s = parse_dup_count(s+1, &max);

	718 else

	719 max = min;

	720

	721 if (

	722 (max < min && max >= 0) \|\|

	723 max > RE_DUP_MAX \|\|

	724 min > RE_DUP_MAX \|\|

	725 min < 0 \|\|

	726 (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') \|\|

	727 *s++ != '}'

	728 )

	729 return REG_BADBR;

	730

	731 if (min == 0 && max == 0)

	732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

	733 else

	734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);

	735 if (!ctx->n)

	736 return REG_ESPACE;

	737 ctx->s = s;

	738 return REG_OK;

	739 }

	740

	741 static int hexval(unsigned c)

	742 {

	743 if (c-'0'<10) return c-'0';

	744 c \|= 32;

	745 if (c-'a'<6) return c-'a'+10;

	746 return -1;

	747 }

	748

	749 static reg_errcode_t marksub(tre_parse_ctx_t ctx, tre_ast_node_t node, int sub id)

	750 {

	751 if (node->submatch_id >= 0) {

	752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1) ;

	753 if (!n)

	754 return REG_ESPACE;

	755 n = tre_ast_new_catenation(ctx->mem, n, node);

	756 if (!n)

	757 return REG_ESPACE;

	758 n->num_submatches = node->num_submatches;

	759 node = n;

	760 }

	761 node->submatch_id = subid;

	762 node->num_submatches++;

	763 ctx->n = node;

	764 return REG_OK;

	765 }

	766

	767 /*

	768 BRE grammar:

	769 Regex = Branch \| '^' \| '$' \| '^$' \| '^' Branch \| Branch '$' \| '^' Branch '$'

	770 Branch = Atom \| Branch Atom

	771 Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '$' Branch '$' \| back_ref

	772 Dup = '*' \| '\{' Count '\}' \| '\{' Count ',\}' \| '\{' Count ',' Count '\}'

	773

	774 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)

	775

	776 ERE grammar:

	777 Regex = Branch \| Regex '\|' Branch

	778 Branch = Atom \| Branch Atom

	779 Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '(' Regex ') ' \| '^' \| '$'

	780 Dup = '*' \| '+' \| '?' \| '{' Count '}' \| '{' Count ',}' \| '{' Count ',' Count '}'

	781

	782 (a+?, ^, $+, \X, {, (\|a) are unspecified)

	783 */

	784

	785 static reg_errcode_t parse_atom(tre_parse_ctx_t ctx, const char s)

	786 {

	787 int len, ere = ctx->cflags & REG_EXTENDED;

	788 const char *p;

	789 tre_ast_node_t *node;

	790 wchar_t wc;

	791 switch (*s) {

	792 case '[':

	793 return parse_bracket(ctx, s+1);

	794 case '\\':

	795 p = tre_expand_macro(s+1);

	796 if (p) {

	797 /* assume \X expansion is a single atom */

	798 reg_errcode_t err = parse_atom(ctx, p);

	799 ctx->s = s+2;

	800 return err;

	801 }

	802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */

	803 switch (*++s) {

	804 case 0:

	805 return REG_EESCAPE;

	806 case 'b':

	807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_WB, -1);

	808 break;

	809 case 'B':

	810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_WB_NEG, -1);

	811 break;

	812 case '<':

	813 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_BOW, -1);

	814 break;

	815 case '>':

	816 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_EOW, -1);

	817 break;

	818 case 'x':

	819 s++;

	820 int i, v = 0, c;

	821 len = 2;

	822 if (*s == '{') {

	823 len = 8;

	824 s++;

	825 }

	826 for (i=0; i<len && v<0x110000; i++) {

	827 c = hexval(s[i]);

	828 if (c < 0) break;

	829 v = 16*v + c;

	830 }

	831 s += i;

	832 if (len == 8) {

	833 if (*s != '}')

	834 return REG_EBRACE;

	835 s++;

	836 }

	837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position );

	838 ctx->position++;

	839 s--;

	840 break;

	841 default:

	842 if (!ere && (unsigned)*s-'1' < 9) {

	843 /* back reference */

	844 int val = *s - '0';

	845 node = tre_ast_new_literal(ctx->mem, BACKREF, va l, ctx->position);

	846 ctx->max_backref = MAX(val, ctx->max_backref);

	847 } else {

	848 /* extension: accept unknown escaped char

	849 as a literal */

	850 goto parse_literal;

	851 }

	852 ctx->position++;

	853 }

	854 s++;

	855 break;

	856 case '.':

	857 if (ctx->cflags & REG_NEWLINE) {

	858 tre_ast_node_t tmp1, tmp2;

	859 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos ition++);

	860 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA X, ctx->position++);

	861 if (tmp1 && tmp2)

	862 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);

	863 else

	864 node = 0;

	865 } else {

	866 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ct x->position++);

	867 }

	868 s++;

	869 break;

	870 case '^':

	871 /* '^' has a special meaning everywhere in EREs, and at beginnin g of BRE. */

	872 if (!ere && s != ctx->re)

	873 goto parse_literal;

	874 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, - 1);

	875 s++;

	876 break;

	877 case '$':

	878 /* '$' is special everywhere in EREs, and in the end of the stri ng in BREs. */

	879 if (!ere && s[1])

	880 goto parse_literal;

	881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, - 1);

	882 s++;

	883 break;

	884 case '*':

	885 case '\|':

	886 case '{':

	887 case '+':

	888 case '?':

	889 if (!ere)

	890 goto parse_literal;

	891 case 0:

	892 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

	893 break;

	894 default:

	895 parse_literal:

	896 len = mbtowc(&wc, s, -1);

	897 if (len < 0)

	898 return REG_BADPAT;

	899 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) \|\| tre_islower(w c))) {

	900 tre_ast_node_t tmp1, tmp2;

	901 /* multiple opposite case characters are not supported * /

	902 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr e_toupper(wc), ctx->position);

	903 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr e_tolower(wc), ctx->position);

	904 if (tmp1 && tmp2)

	905 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);

	906 else

	907 node = 0;

	908 } else {

	909 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi on);

	910 }

	911 ctx->position++;

	912 s += len;

	913 break;

	914 }

	915 if (!node)

	916 return REG_ESPACE;

	917 ctx->n = node;

	918 ctx->s = s;

	919 return REG_OK;

	920 }

	921

	922 #define PUSHPTR(err, s, v) do { \

	923 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \

	924 return err; \

	925 } while(0)

	926

	927 #define PUSHINT(err, s, v) do { \

	928 if ((err = tre_stack_push_int(s, v)) != REG_OK) \

	929 return err; \

	930 } while(0)

	931

	932 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)

	933 {

	934 tre_ast_node_t nbranch=0, nunion=0;

	935 int ere = ctx->cflags & REG_EXTENDED;

	936 const char *s = ctx->re;

	937 int subid = 0;

	938 int depth = 0;

	939 reg_errcode_t err;

	940 tre_stack_t *stack = ctx->stack;

	941

	942 PUSHINT(err, stack, subid++);

	943 for (;;) {

	944 if ((!ere && *s == '\\' && s[1] == '(') \|\|

	945 (ere && *s == '(')) {

	946 PUSHPTR(err, stack, nunion);

	947 PUSHPTR(err, stack, nbranch);

	948 PUSHINT(err, stack, subid++);

	949 s++;

	950 if (!ere)

	951 s++;

	952 depth++;

	953 nbranch = nunion = 0;

	954 continue;

	955 }

	956 if ((!ere && *s == '\\' && s[1] == ')') \|\|

	957 (ere && *s == ')' && depth)) {

	958 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

	959 if (!ctx->n)

	960 return REG_ESPACE;

	961 } else {

	962 err = parse_atom(ctx, s);

	963 if (err != REG_OK)

	964 return err;

	965 s = ctx->s;

	966 }

	967

	968 parse_iter:

	969 /* extension: repetitions are accepted after an empty node

	970 eg. (+), ^, a$?, a\|{2} /

	971 switch (*s) {

	972 case '+':

	973 case '?':

	974 if (!ere)

	975 break;

	976 /* fallthrough */

	977 case '*':;

	978 int min=0, max=-1;

	979 if (*s == '+')

	980 min = 1;

	981 if (*s == '?')

	982 max = 1;

	983 s++;

	984 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0) ;

	985 if (!ctx->n)

	986 return REG_ESPACE;

	987 /* extension: multiple consecutive *+?{,} is unspecified ,

	988 but (a+)+ has to be supported so accepting a++ makes

	989 sense, note however that the RE_DUP_MAX limit can be

	990 circumvented: (a{255}){255} uses a lot of memory.. */

	991 goto parse_iter;

	992 case '\\':

	993 if (ere \|\| s[1] != '{')

	994 break;

	995 s++;

	996 goto parse_brace;

	997 case '{':

	998 if (!ere)

	999 break;

	1000 parse_brace:

	1001 err = parse_dup(ctx, s+1);

	1002 if (err != REG_OK)

	1003 return err;

	1004 s = ctx->s;

	1005 goto parse_iter;

	1006 }

	1007

	1008 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);

	1009 if ((ere && *s == '\|') \|\|

	1010 (ere && *s == ')' && depth) \|\|

	1011 (!ere && *s == '\\' && s[1] == ')') \|\|

	1012 !*s) {

	1013 /* extension: empty branch is unspecified (), (\|a), (a\|)

	1014 here they are not rejected but match on empty string */

	1015 int c = *s;

	1016 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);

	1017 nbranch = 0;

	1018 if (c != '\|') {

	1019 if (c == '\\') {

	1020 if (!depth) return REG_EPAREN;

	1021 s+=2;

	1022 } else if (c == ')')

	1023 s++;

	1024 depth--;

	1025 err = marksub(ctx, nunion, tre_stack_pop_int(sta ck));

	1026 if (err != REG_OK)

	1027 return err;

	1028 if (!c && depth<0) {

	1029 ctx->submatch_id = subid;

	1030 return REG_OK;

	1031 }

	1032 if (!c \|\| depth<0)

	1033 return REG_EPAREN;

	1034 nbranch = tre_stack_pop_voidptr(stack);

	1035 nunion = tre_stack_pop_voidptr(stack);

	1036 goto parse_iter;

	1037 }

	1038 s++;

	1039 }

	1040 }

	1041 }

	1042

	1043

	1044 /***********************************************************************

	1045 from tre-compile.c

	1046 ***********************************************************************/

	1047

	1048

	1049 /*

	1050 TODO:

	1051 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive

	1052 function calls.

	1053 */

	1054

	1055 /*

	1056 Algorithms to setup tags so that submatch addressing can be done.

	1057 */

	1058

	1059

	1060 /* Inserts a catenation node to the root of the tree given in `node'.

	1061 As the left child a new tag with number `tag_id' to `node' is added,

	1062 and the right child is the old root. */

	1063 static reg_errcode_t

	1064 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)

	1065 {

	1066 tre_catenation_t *c;

	1067

	1068 c = tre_mem_alloc(mem, sizeof(*c));

	1069 if (c == NULL)

	1070 return REG_ESPACE;

	1071 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);

	1072 if (c->left == NULL)

	1073 return REG_ESPACE;

	1074 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));

	1075 if (c->right == NULL)

	1076 return REG_ESPACE;

	1077

	1078 c->right->obj = node->obj;

	1079 c->right->type = node->type;

	1080 c->right->nullable = -1;

	1081 c->right->submatch_id = -1;

	1082 c->right->firstpos = NULL;

	1083 c->right->lastpos = NULL;

	1084 c->right->num_tags = 0;

	1085 node->obj = c;

	1086 node->type = CATENATION;

	1087 return REG_OK;

	1088 }

	1089

	1090 /* Inserts a catenation node to the root of the tree given in `node'.

	1091 As the right child a new tag with number `tag_id' to `node' is added,

	1092 and the left child is the old root. */

	1093 static reg_errcode_t

	1094 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)

	1095 {

	1096 tre_catenation_t *c;

	1097

	1098 c = tre_mem_alloc(mem, sizeof(*c));

	1099 if (c == NULL)

	1100 return REG_ESPACE;

	1101 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);

	1102 if (c->right == NULL)

	1103 return REG_ESPACE;

	1104 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));

	1105 if (c->left == NULL)

	1106 return REG_ESPACE;

	1107

	1108 c->left->obj = node->obj;

	1109 c->left->type = node->type;

	1110 c->left->nullable = -1;

	1111 c->left->submatch_id = -1;

	1112 c->left->firstpos = NULL;

	1113 c->left->lastpos = NULL;

	1114 c->left->num_tags = 0;

	1115 node->obj = c;

	1116 node->type = CATENATION;

	1117 return REG_OK;

	1118 }

	1119

	1120 typedef enum {

	1121 ADDTAGS_RECURSE,

	1122 ADDTAGS_AFTER_ITERATION,

	1123 ADDTAGS_AFTER_UNION_LEFT,

	1124 ADDTAGS_AFTER_UNION_RIGHT,

	1125 ADDTAGS_AFTER_CAT_LEFT,

	1126 ADDTAGS_AFTER_CAT_RIGHT,

	1127 ADDTAGS_SET_SUBMATCH_END

	1128 } tre_addtags_symbol_t;

	1129

	1130

	1131 typedef struct {

	1132 int tag;

	1133 int next_tag;

	1134 } tre_tag_states_t;

	1135

	1136

	1137 /* Go through `regset' and set submatch data for submatches that are

	1138 using this tag. */

	1139 static void

	1140 tre_purge_regset(int regset, tre_tnfa_t tnfa, int tag)

	1141 {

	1142 int i;

	1143

	1144 for (i = 0; regset[i] >= 0; i++)

	1145 {

	1146 int id = regset[i] / 2;

	1147 int start = !(regset[i] % 2);

	1148 if (start)

	1149 tnfa->submatch_data[id].so_tag = tag;

	1150 else

	1151 tnfa->submatch_data[id].eo_tag = tag;

	1152 }

	1153 regset[0] = -1;

	1154 }

	1155

	1156

	1157 /* Adds tags to appropriate locations in the parse tree in `tree', so that

	1158 subexpressions marked for submatch addressing can be traced. */

	1159 static reg_errcode_t

	1160 tre_add_tags(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree,

	1161 tre_tnfa_t *tnfa)

	1162 {

	1163 reg_errcode_t status = REG_OK;

	1164 tre_addtags_symbol_t symbol;

	1165 tre_ast_node_t node = tree; / Tree node we are currently looking at. */

	1166 int bottom = tre_stack_num_objects(stack);

	1167 /* True for first pass (counting number of needed tags) */

	1168 int first_pass = (mem == NULL \|\| tnfa == NULL);

	1169 int regset, orig_regset;

	1170 int num_tags = 0; /* Total number of tags. */

	1171 int num_minimals = 0; /* Number of special minimal tags. */

	1172 int tag = 0; /* The tag that is to be added next. */

	1173 int next_tag = 1; /* Next tag to use after this one. */

	1174 int parents; / Stack of submatches the current submatch is

	1175 contained in. */

	1176 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */

	1177 tre_tag_states_t *saved_states;

	1178

	1179 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;

	1180 if (!first_pass)

	1181 {

	1182 tnfa->end_tag = 0;

	1183 tnfa->minimal_tags[0] = -1;

	1184 }

	1185

	1186 regset = xmalloc(sizeof(regset) ((tnfa->num_submatches + 1) * 2));

	1187 if (regset == NULL)

	1188 return REG_ESPACE;

	1189 regset[0] = -1;

	1190 orig_regset = regset;

	1191

	1192 parents = xmalloc(sizeof(parents) (tnfa->num_submatches + 1));

	1193 if (parents == NULL)

	1194 {

	1195 xfree(regset);

	1196 return REG_ESPACE;

	1197 }

	1198 parents[0] = -1;

	1199

	1200 saved_states = xmalloc(sizeof(saved_states) (tnfa->num_submatches + 1));

	1201 if (saved_states == NULL)

	1202 {

	1203 xfree(regset);

	1204 xfree(parents);

	1205 return REG_ESPACE;

	1206 }

	1207 else

	1208 {

	1209 unsigned int i;

	1210 for (i = 0; i <= tnfa->num_submatches; i++)

	1211 saved_states[i].tag = -1;

	1212 }

	1213

	1214 STACK_PUSH(stack, voidptr, node);

	1215 STACK_PUSH(stack, int, ADDTAGS_RECURSE);

	1216

	1217 while (tre_stack_num_objects(stack) > bottom)

	1218 {

	1219 if (status != REG_OK)

	1220 break;

	1221

	1222 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);

	1223 switch (symbol)

	1224 {

	1225

	1226 case ADDTAGS_SET_SUBMATCH_END:

	1227 {

	1228 int id = tre_stack_pop_int(stack);

	1229 int i;

	1230

	1231 /* Add end of this submatch to regset. */

	1232 for (i = 0; regset[i] >= 0; i++);

	1233 regset[i] = id * 2 + 1;

	1234 regset[i + 1] = -1;

	1235

	1236 /* Pop this submatch from the parents stack. */

	1237 for (i = 0; parents[i] >= 0; i++);

	1238 parents[i - 1] = -1;

	1239 break;

	1240 }

	1241

	1242 case ADDTAGS_RECURSE:

	1243 node = tre_stack_pop_voidptr(stack);

	1244

	1245 if (node->submatch_id >= 0)

	1246 {

	1247 int id = node->submatch_id;

	1248 int i;

	1249

	1250

	1251 /* Add start of this submatch to regset. */

	1252 for (i = 0; regset[i] >= 0; i++);

	1253 regset[i] = id * 2;

	1254 regset[i + 1] = -1;

	1255

	1256 if (!first_pass)

	1257 {

	1258 for (i = 0; parents[i] >= 0; i++);

	1259 tnfa->submatch_data[id].parents = NULL;

	1260 if (i > 0)

	1261 {

	1262 int p = xmalloc(sizeof(p) * (i + 1));

	1263 if (p == NULL)

	1264 {

	1265 status = REG_ESPACE;

	1266 break;

	1267 }

	1268 assert(tnfa->submatch_data[id].parents == NULL);

	1269 tnfa->submatch_data[id].parents = p;

	1270 for (i = 0; parents[i] >= 0; i++)

	1271 p[i] = parents[i];

	1272 p[i] = -1;

	1273 }

	1274 }

	1275

	1276 /* Add end of this submatch to regset after processing this

	1277 node. */

	1278 STACK_PUSHX(stack, int, node->submatch_id);

	1279 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);

	1280 }

	1281

	1282 switch (node->type)

	1283 {

	1284 case LITERAL:

	1285 {

	1286 tre_literal_t *lit = node->obj;

	1287

	1288 if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit))

	1289 {

	1290 int i;

	1291 if (regset[0] >= 0)

	1292 {

	1293 /* Regset is not empty, so add a tag before the

	1294 literal or backref. */

	1295 if (!first_pass)

	1296 {

	1297 status = tre_add_tag_left(mem, node, tag);

	1298 tnfa->tag_directions[tag] = direction;

	1299 if (minimal_tag >= 0)

	1300 {

	1301 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);

	1302 tnfa->minimal_tags[i] = tag;

	1303 tnfa->minimal_tags[i + 1] = minimal_tag;

	1304 tnfa->minimal_tags[i + 2] = -1;

	1305 minimal_tag = -1;

	1306 num_minimals++;

	1307 }

	1308 tre_purge_regset(regset, tnfa, tag);

	1309 }

	1310 else

	1311 {

	1312 node->num_tags = 1;

	1313 }

	1314

	1315 regset[0] = -1;

	1316 tag = next_tag;

	1317 num_tags++;

	1318 next_tag++;

	1319 }

	1320 }

	1321 else

	1322 {

	1323 assert(!IS_TAG(lit));

	1324 }

	1325 break;

	1326 }

	1327 case CATENATION:

	1328 {

	1329 tre_catenation_t *cat = node->obj;

	1330 tre_ast_node_t *left = cat->left;

	1331 tre_ast_node_t *right = cat->right;

	1332 int reserved_tag = -1;

	1333

	1334

	1335 /* After processing right child. */

	1336 STACK_PUSHX(stack, voidptr, node);

	1337 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);

	1338

	1339 /* Process right child. */

	1340 STACK_PUSHX(stack, voidptr, right);

	1341 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

	1342

	1343 /* After processing left child. */

	1344 STACK_PUSHX(stack, int, next_tag + left->num_tags);

	1345 if (left->num_tags > 0 && right->num_tags > 0)

	1346 {

	1347 /* Reserve the next tag to the right child. */

	1348 reserved_tag = next_tag;

	1349 next_tag++;

	1350 }

	1351 STACK_PUSHX(stack, int, reserved_tag);

	1352 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);

	1353

	1354 /* Process left child. */

	1355 STACK_PUSHX(stack, voidptr, left);

	1356 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

	1357

	1358 }

	1359 break;

	1360 case ITERATION:

	1361 {

	1362 tre_iteration_t *iter = node->obj;

	1363

	1364 if (first_pass)

	1365 {

	1366 STACK_PUSHX(stack, int, regset[0] >= 0 \|\| iter->minimal);

	1367 }

	1368 else

	1369 {

	1370 STACK_PUSHX(stack, int, tag);

	1371 STACK_PUSHX(stack, int, iter->minimal);

	1372 }

	1373 STACK_PUSHX(stack, voidptr, node);

	1374 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);

	1375

	1376 STACK_PUSHX(stack, voidptr, iter->arg);

	1377 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

	1378

	1379 /* Regset is not empty, so add a tag here. */

	1380 if (regset[0] >= 0 \|\| iter->minimal)

	1381 {

	1382 if (!first_pass)

	1383 {

	1384 int i;

	1385 status = tre_add_tag_left(mem, node, tag);

	1386 if (iter->minimal)

	1387 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;

	1388 else

	1389 tnfa->tag_directions[tag] = direction;

	1390 if (minimal_tag >= 0)

	1391 {

	1392 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);

	1393 tnfa->minimal_tags[i] = tag;

	1394 tnfa->minimal_tags[i + 1] = minimal_tag;

	1395 tnfa->minimal_tags[i + 2] = -1;

	1396 minimal_tag = -1;

	1397 num_minimals++;

	1398 }

	1399 tre_purge_regset(regset, tnfa, tag);

	1400 }

	1401

	1402 regset[0] = -1;

	1403 tag = next_tag;

	1404 num_tags++;

	1405 next_tag++;

	1406 }

	1407 direction = TRE_TAG_MINIMIZE;

	1408 }

	1409 break;

	1410 case UNION:

	1411 {

	1412 tre_union_t *uni = node->obj;

	1413 tre_ast_node_t *left = uni->left;

	1414 tre_ast_node_t *right = uni->right;

	1415 int left_tag;

	1416 int right_tag;

	1417

	1418 if (regset[0] >= 0)

	1419 {

	1420 left_tag = next_tag;

	1421 right_tag = next_tag + 1;

	1422 }

	1423 else

	1424 {

	1425 left_tag = tag;

	1426 right_tag = next_tag;

	1427 }

	1428

	1429 /* After processing right child. */

	1430 STACK_PUSHX(stack, int, right_tag);

	1431 STACK_PUSHX(stack, int, left_tag);

	1432 STACK_PUSHX(stack, voidptr, regset);

	1433 STACK_PUSHX(stack, int, regset[0] >= 0);

	1434 STACK_PUSHX(stack, voidptr, node);

	1435 STACK_PUSHX(stack, voidptr, right);

	1436 STACK_PUSHX(stack, voidptr, left);

	1437 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);

	1438

	1439 /* Process right child. */

	1440 STACK_PUSHX(stack, voidptr, right);

	1441 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

	1442

	1443 /* After processing left child. */

	1444 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);

	1445

	1446 /* Process left child. */

	1447 STACK_PUSHX(stack, voidptr, left);

	1448 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

	1449

	1450 /* Regset is not empty, so add a tag here. */

	1451 if (regset[0] >= 0)

	1452 {

	1453 if (!first_pass)

	1454 {

	1455 int i;

	1456 status = tre_add_tag_left(mem, node, tag);

	1457 tnfa->tag_directions[tag] = direction;

	1458 if (minimal_tag >= 0)

	1459 {

	1460 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);

	1461 tnfa->minimal_tags[i] = tag;

	1462 tnfa->minimal_tags[i + 1] = minimal_tag;

	1463 tnfa->minimal_tags[i + 2] = -1;

	1464 minimal_tag = -1;

	1465 num_minimals++;

	1466 }

	1467 tre_purge_regset(regset, tnfa, tag);

	1468 }

	1469

	1470 regset[0] = -1;

	1471 tag = next_tag;

	1472 num_tags++;

	1473 next_tag++;

	1474 }

	1475

	1476 if (node->num_submatches > 0)

	1477 {

	1478 /* The next two tags are reserved for markers. */

	1479 next_tag++;

	1480 tag = next_tag;

	1481 next_tag++;

	1482 }

	1483

	1484 break;

	1485 }

	1486 }

	1487

	1488 if (node->submatch_id >= 0)

	1489 {

	1490 int i;

	1491 /* Push this submatch on the parents stack. */

	1492 for (i = 0; parents[i] >= 0; i++);

	1493 parents[i] = node->submatch_id;

	1494 parents[i + 1] = -1;

	1495 }

	1496

	1497 break; /* end case: ADDTAGS_RECURSE */

	1498

	1499 case ADDTAGS_AFTER_ITERATION:

	1500 {

	1501 int minimal = 0;

	1502 int enter_tag;

	1503 node = tre_stack_pop_voidptr(stack);

	1504 if (first_pass)

	1505 {

	1506 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags

	1507 + tre_stack_pop_int(stack);

	1508 minimal_tag = -1;

	1509 }

	1510 else

	1511 {

	1512 minimal = tre_stack_pop_int(stack);

	1513 enter_tag = tre_stack_pop_int(stack);

	1514 if (minimal)

	1515 minimal_tag = enter_tag;

	1516 }

	1517

	1518 if (!first_pass)

	1519 {

	1520 if (minimal)

	1521 direction = TRE_TAG_MINIMIZE;

	1522 else

	1523 direction = TRE_TAG_MAXIMIZE;

	1524 }

	1525 break;

	1526 }

	1527

	1528 case ADDTAGS_AFTER_CAT_LEFT:

	1529 {

	1530 int new_tag = tre_stack_pop_int(stack);

	1531 next_tag = tre_stack_pop_int(stack);

	1532 if (new_tag >= 0)

	1533 {

	1534 tag = new_tag;

	1535 }

	1536 break;

	1537 }

	1538

	1539 case ADDTAGS_AFTER_CAT_RIGHT:

	1540 node = tre_stack_pop_voidptr(stack);

	1541 if (first_pass)

	1542 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags

	1543 + ((tre_catenation_t *)node->obj)->right->num_tags;

	1544 break;

	1545

	1546 case ADDTAGS_AFTER_UNION_LEFT:

	1547 /* Lift the bottom of the `regset' array so that when processing

	1548 the right operand the items currently in the array are

	1549 invisible. The original bottom was saved at ADDTAGS_UNION and

	1550 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */

	1551 while (*regset >= 0)

	1552 regset++;

	1553 break;

	1554

	1555 case ADDTAGS_AFTER_UNION_RIGHT:

	1556 {

	1557 int added_tags, tag_left, tag_right;

	1558 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);

	1559 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);

	1560 node = tre_stack_pop_voidptr(stack);

	1561 added_tags = tre_stack_pop_int(stack);

	1562 if (first_pass)

	1563 {

	1564 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags

	1565 + ((tre_union_t *)node->obj)->right->num_tags + added_tags

	1566 + ((node->num_submatches > 0) ? 2 : 0);

	1567 }

	1568 regset = tre_stack_pop_voidptr(stack);

	1569 tag_left = tre_stack_pop_int(stack);

	1570 tag_right = tre_stack_pop_int(stack);

	1571

	1572 /* Add tags after both children, the left child gets a smaller

	1573 tag than the right child. This guarantees that we prefer

	1574 the left child over the right child. */

	1575 /* XXX - This is not always necessary (if the children have

	1576 tags which must be seen for every match of that child). */

	1577 /* XXX - Check if this is the only place where tre_add_tag_right

	1578 is used. If so, use tre_add_tag_left (putting the tag before

	1579 the child as opposed after the child) and throw away

	1580 tre_add_tag_right. */

	1581 if (node->num_submatches > 0)

	1582 {

	1583 if (!first_pass)

	1584 {

	1585 status = tre_add_tag_right(mem, left, tag_left);

	1586 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;

	1587 if (status == REG_OK)

	1588 status = tre_add_tag_right(mem, right, tag_right);

	1589 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;

	1590 }

	1591 num_tags += 2;

	1592 }

	1593 direction = TRE_TAG_MAXIMIZE;

	1594 break;

	1595 }

	1596

	1597 default:

	1598 assert(0);

	1599 break;

	1600

	1601 } /* end switch(symbol) */

	1602 } /* end while(tre_stack_num_objects(stack) > bottom) */

	1603

	1604 if (!first_pass)

	1605 tre_purge_regset(regset, tnfa, tag);

	1606

	1607 if (!first_pass && minimal_tag >= 0)

	1608 {

	1609 int i;

	1610 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);

	1611 tnfa->minimal_tags[i] = tag;

	1612 tnfa->minimal_tags[i + 1] = minimal_tag;

	1613 tnfa->minimal_tags[i + 2] = -1;

	1614 minimal_tag = -1;

	1615 num_minimals++;

	1616 }

	1617

	1618 assert(tree->num_tags == num_tags);

	1619 tnfa->end_tag = num_tags;

	1620 tnfa->num_tags = num_tags;

	1621 tnfa->num_minimals = num_minimals;

	1622 xfree(orig_regset);

	1623 xfree(parents);

	1624 xfree(saved_states);

	1625 return status;

	1626 }

	1627

	1628

	1629

	1630 /*

	1631 AST to TNFA compilation routines.

	1632 */

	1633

	1634 typedef enum {

	1635 COPY_RECURSE,

	1636 COPY_SET_RESULT_PTR

	1637 } tre_copyast_symbol_t;

	1638

	1639 /* Flags for tre_copy_ast(). */

	1640 #define COPY_REMOVE_TAGS 1

	1641 #define COPY_MAXIMIZE_FIRST_TAG 2

	1642

	1643 static reg_errcode_t

	1644 tre_copy_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,

	1645 int flags, int pos_add, tre_tag_direction_t tag_directions,

	1646 tre_ast_node_t *copy, int max_pos)

	1647 {

	1648 reg_errcode_t status = REG_OK;

	1649 int bottom = tre_stack_num_objects(stack);

	1650 int num_copied = 0;

	1651 int first_tag = 1;

	1652 tre_ast_node_t **result = copy;

	1653 tre_copyast_symbol_t symbol;

	1654

	1655 STACK_PUSH(stack, voidptr, ast);

	1656 STACK_PUSH(stack, int, COPY_RECURSE);

	1657

	1658 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)

	1659 {

	1660 tre_ast_node_t *node;

	1661 if (status != REG_OK)

	1662 break;

	1663

	1664 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);

	1665 switch (symbol)

	1666 {

	1667 case COPY_SET_RESULT_PTR:

	1668 result = tre_stack_pop_voidptr(stack);

	1669 break;

	1670 case COPY_RECURSE:

	1671 node = tre_stack_pop_voidptr(stack);

	1672 switch (node->type)

	1673 {

	1674 case LITERAL:

	1675 {

	1676 tre_literal_t *lit = node->obj;

	1677 int pos = lit->position;

	1678 int min = lit->code_min;

	1679 int max = lit->code_max;

	1680 if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit))

	1681 {

	1682 /* XXX - e.g. [ab] has only one position but two

	1683 nodes, so we are creating holes in the state space

	1684 here. Not fatal, just wastes memory. */

	1685 pos += *pos_add;

	1686 num_copied++;

	1687 }

	1688 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))

	1689 {

	1690 /* Change this tag to empty. */

	1691 min = EMPTY;

	1692 max = pos = -1;

	1693 }

	1694 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)

	1695 && first_tag)

	1696 {

	1697 /* Maximize the first tag. */

	1698 tag_directions[max] = TRE_TAG_MAXIMIZE;

	1699 first_tag = 0;

	1700 }

	1701 *result = tre_ast_new_literal(mem, min, max, pos);

	1702 if (*result == NULL)

	1703 status = REG_ESPACE;

	1704 else {

	1705 tre_literal_t p = (result)->obj;

	1706 p->class = lit->class;

	1707 p->neg_classes = lit->neg_classes;

	1708 }

	1709

	1710 if (pos > *max_pos)

	1711 *max_pos = pos;

	1712 break;

	1713 }

	1714 case UNION:

	1715 {

	1716 tre_union_t *uni = node->obj;

	1717 tre_union_t *tmp;

	1718 *result = tre_ast_new_union(mem, uni->left, uni->right);

	1719 if (*result == NULL)

	1720 {

	1721 status = REG_ESPACE;

	1722 break;

	1723 }

	1724 tmp = (*result)->obj;

	1725 result = &tmp->left;

	1726 STACK_PUSHX(stack, voidptr, uni->right);

	1727 STACK_PUSHX(stack, int, COPY_RECURSE);

	1728 STACK_PUSHX(stack, voidptr, &tmp->right);

	1729 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);

	1730 STACK_PUSHX(stack, voidptr, uni->left);

	1731 STACK_PUSHX(stack, int, COPY_RECURSE);

	1732 break;

	1733 }

	1734 case CATENATION:

	1735 {

	1736 tre_catenation_t *cat = node->obj;

	1737 tre_catenation_t *tmp;

	1738 *result = tre_ast_new_catenation(mem, cat->left, cat->right);

	1739 if (*result == NULL)

	1740 {

	1741 status = REG_ESPACE;

	1742 break;

	1743 }

	1744 tmp = (*result)->obj;

	1745 tmp->left = NULL;

	1746 tmp->right = NULL;

	1747 result = &tmp->left;

	1748

	1749 STACK_PUSHX(stack, voidptr, cat->right);

	1750 STACK_PUSHX(stack, int, COPY_RECURSE);

	1751 STACK_PUSHX(stack, voidptr, &tmp->right);

	1752 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);

	1753 STACK_PUSHX(stack, voidptr, cat->left);

	1754 STACK_PUSHX(stack, int, COPY_RECURSE);

	1755 break;

	1756 }

	1757 case ITERATION:

	1758 {

	1759 tre_iteration_t *iter = node->obj;

	1760 STACK_PUSHX(stack, voidptr, iter->arg);

	1761 STACK_PUSHX(stack, int, COPY_RECURSE);

	1762 *result = tre_ast_new_iter(mem, iter->arg, iter->min,

	1763 iter->max, iter->minimal);

	1764 if (*result == NULL)

	1765 {

	1766 status = REG_ESPACE;

	1767 break;

	1768 }

	1769 iter = (*result)->obj;

	1770 result = &iter->arg;

	1771 break;

	1772 }

	1773 default:

	1774 assert(0);

	1775 break;

	1776 }

	1777 break;

	1778 }

	1779 }

	1780 *pos_add += num_copied;

	1781 return status;

	1782 }

	1783

	1784 typedef enum {

	1785 EXPAND_RECURSE,

	1786 EXPAND_AFTER_ITER

	1787 } tre_expand_ast_symbol_t;

	1788

	1789 /* Expands each iteration node that has a finite nonzero minimum or maximum

	1790 iteration count to a catenated sequence of copies of the node. */

	1791 static reg_errcode_t

	1792 tre_expand_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,

	1793 int position, tre_tag_direction_t tag_directions)

	1794 {

	1795 reg_errcode_t status = REG_OK;

	1796 int bottom = tre_stack_num_objects(stack);

	1797 int pos_add = 0;

	1798 int pos_add_total = 0;

	1799 int max_pos = 0;

	1800 int iter_depth = 0;

	1801

	1802 STACK_PUSHR(stack, voidptr, ast);

	1803 STACK_PUSHR(stack, int, EXPAND_RECURSE);

	1804 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)

	1805 {

	1806 tre_ast_node_t *node;

	1807 tre_expand_ast_symbol_t symbol;

	1808

	1809 if (status != REG_OK)

	1810 break;

	1811

	1812 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);

	1813 node = tre_stack_pop_voidptr(stack);

	1814 switch (symbol)

	1815 {

	1816 case EXPAND_RECURSE:

	1817 switch (node->type)

	1818 {

	1819 case LITERAL:

	1820 {

	1821 tre_literal_t *lit= node->obj;

	1822 if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit))

	1823 {

	1824 lit->position += pos_add;

	1825 if (lit->position > max_pos)

	1826 max_pos = lit->position;

	1827 }

	1828 break;

	1829 }

	1830 case UNION:

	1831 {

	1832 tre_union_t *uni = node->obj;

	1833 STACK_PUSHX(stack, voidptr, uni->right);

	1834 STACK_PUSHX(stack, int, EXPAND_RECURSE);

	1835 STACK_PUSHX(stack, voidptr, uni->left);

	1836 STACK_PUSHX(stack, int, EXPAND_RECURSE);

	1837 break;

	1838 }

	1839 case CATENATION:

	1840 {

	1841 tre_catenation_t *cat = node->obj;

	1842 STACK_PUSHX(stack, voidptr, cat->right);

	1843 STACK_PUSHX(stack, int, EXPAND_RECURSE);

	1844 STACK_PUSHX(stack, voidptr, cat->left);

	1845 STACK_PUSHX(stack, int, EXPAND_RECURSE);

	1846 break;

	1847 }

	1848 case ITERATION:

	1849 {

	1850 tre_iteration_t *iter = node->obj;

	1851 STACK_PUSHX(stack, int, pos_add);

	1852 STACK_PUSHX(stack, voidptr, node);

	1853 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);

	1854 STACK_PUSHX(stack, voidptr, iter->arg);

	1855 STACK_PUSHX(stack, int, EXPAND_RECURSE);

	1856 /* If we are going to expand this node at EXPAND_AFTER_ITER

	1857 then don't increase the `pos' fields of the nodes now, it

	1858 will get done when expanding. */

	1859 if (iter->min > 1 \|\| iter->max > 1)

	1860 pos_add = 0;

	1861 iter_depth++;

	1862 break;

	1863 }

	1864 default:

	1865 assert(0);

	1866 break;

	1867 }

	1868 break;

	1869 case EXPAND_AFTER_ITER:

	1870 {

	1871 tre_iteration_t *iter = node->obj;

	1872 int pos_add_last;

	1873 pos_add = tre_stack_pop_int(stack);

	1874 pos_add_last = pos_add;

	1875 if (iter->min > 1 \|\| iter->max > 1)

	1876 {

	1877 tre_ast_node_t seq1 = NULL, seq2 = NULL;

	1878 int j;

	1879 int pos_add_save = pos_add;

	1880

	1881 /* Create a catenated sequence of copies of the node. */

	1882 for (j = 0; j < iter->min; j++)

	1883 {

	1884 tre_ast_node_t *copy;

	1885 /* Remove tags from all but the last copy. */

	1886 int flags = ((j + 1 < iter->min)

	1887 ? COPY_REMOVE_TAGS

	1888 : COPY_MAXIMIZE_FIRST_TAG);

	1889 pos_add_save = pos_add;

	1890 status = tre_copy_ast(mem, stack, iter->arg, flags,

	1891 &pos_add, tag_directions, &copy,

	1892 &max_pos);

	1893 if (status != REG_OK)

	1894 return status;

	1895 if (seq1 != NULL)

	1896 seq1 = tre_ast_new_catenation(mem, seq1, copy);

	1897 else

	1898 seq1 = copy;

	1899 if (seq1 == NULL)

	1900 return REG_ESPACE;

	1901 }

	1902

	1903 if (iter->max == -1)

	1904 {

	1905 /* No upper limit. */

	1906 pos_add_save = pos_add;

	1907 status = tre_copy_ast(mem, stack, iter->arg, 0,

	1908 &pos_add, NULL, &seq2, &max_pos);

	1909 if (status != REG_OK)

	1910 return status;

	1911 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);

	1912 if (seq2 == NULL)

	1913 return REG_ESPACE;

	1914 }

	1915 else

	1916 {

	1917 for (j = iter->min; j < iter->max; j++)

	1918 {

	1919 tre_ast_node_t tmp, copy;

	1920 pos_add_save = pos_add;

	1921 status = tre_copy_ast(mem, stack, iter->arg, 0,

	1922 &pos_add, NULL, &copy, &max_pos);

	1923 if (status != REG_OK)

	1924 return status;

	1925 if (seq2 != NULL)

	1926 seq2 = tre_ast_new_catenation(mem, copy, seq2);

	1927 else

	1928 seq2 = copy;

	1929 if (seq2 == NULL)

	1930 return REG_ESPACE;

	1931 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);

	1932 if (tmp == NULL)

	1933 return REG_ESPACE;

	1934 seq2 = tre_ast_new_union(mem, tmp, seq2);

	1935 if (seq2 == NULL)

	1936 return REG_ESPACE;

	1937 }

	1938 }

	1939

	1940 pos_add = pos_add_save;

	1941 if (seq1 == NULL)

	1942 seq1 = seq2;

	1943 else if (seq2 != NULL)

	1944 seq1 = tre_ast_new_catenation(mem, seq1, seq2);

	1945 if (seq1 == NULL)

	1946 return REG_ESPACE;

	1947 node->obj = seq1->obj;

	1948 node->type = seq1->type;

	1949 }

	1950

	1951 iter_depth--;

	1952 pos_add_total += pos_add - pos_add_last;

	1953 if (iter_depth == 0)

	1954 pos_add = pos_add_total;

	1955

	1956 break;

	1957 }

	1958 default:

	1959 assert(0);

	1960 break;

	1961 }

	1962 }

	1963

	1964 *position += pos_add_total;

	1965

	1966 /* `max_pos' should never be larger than `*position' if the above

	1967 code works, but just an extra safeguard let's make sure

	1968 `*position' is set large enough so enough memory will be

	1969 allocated for the transition table. */

	1970 if (max_pos > *position)

	1971 *position = max_pos;

	1972

	1973 return status;

	1974 }

	1975

	1976 static tre_pos_and_tags_t *

	1977 tre_set_empty(tre_mem_t mem)

	1978 {

	1979 tre_pos_and_tags_t *new_set;

	1980

	1981 new_set = tre_mem_calloc(mem, sizeof(*new_set));

	1982 if (new_set == NULL)

	1983 return NULL;

	1984

	1985 new_set[0].position = -1;

	1986 new_set[0].code_min = -1;

	1987 new_set[0].code_max = -1;

	1988

	1989 return new_set;

	1990 }

	1991

	1992 static tre_pos_and_tags_t *

	1993 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,

	1994 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)

	1995 {

	1996 tre_pos_and_tags_t *new_set;

	1997

	1998 new_set = tre_mem_calloc(mem, sizeof(new_set) 2);

	1999 if (new_set == NULL)

	2000 return NULL;

	2001

	2002 new_set[0].position = position;

	2003 new_set[0].code_min = code_min;

	2004 new_set[0].code_max = code_max;

	2005 new_set[0].class = class;

	2006 new_set[0].neg_classes = neg_classes;

	2007 new_set[0].backref = backref;

	2008 new_set[1].position = -1;

	2009 new_set[1].code_min = -1;

	2010 new_set[1].code_max = -1;

	2011

	2012 return new_set;

	2013 }

	2014

	2015 static tre_pos_and_tags_t *

	2016 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t set1, tre_pos_and_tags_t set2,

	2017 int *tags, int assertions)

	2018 {

	2019 int s1, s2, i, j;

	2020 tre_pos_and_tags_t *new_set;

	2021 int *new_tags;

	2022 int num_tags;

	2023

	2024 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);

	2025 for (s1 = 0; set1[s1].position >= 0; s1++);

	2026 for (s2 = 0; set2[s2].position >= 0; s2++);

	2027 new_set = tre_mem_calloc(mem, sizeof(new_set) (s1 + s2 + 1));

	2028 if (!new_set )

	2029 return NULL;

	2030

	2031 for (s1 = 0; set1[s1].position >= 0; s1++)

	2032 {

	2033 new_set[s1].position = set1[s1].position;

	2034 new_set[s1].code_min = set1[s1].code_min;

	2035 new_set[s1].code_max = set1[s1].code_max;

	2036 new_set[s1].assertions = set1[s1].assertions \| assertions;

	2037 new_set[s1].class = set1[s1].class;

	2038 new_set[s1].neg_classes = set1[s1].neg_classes;

	2039 new_set[s1].backref = set1[s1].backref;

	2040 if (set1[s1].tags == NULL && tags == NULL)

	2041 new_set[s1].tags = NULL;

	2042 else

	2043 {

	2044 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);

	2045 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)

	2046 * (i + num_tags + 1)));

	2047 if (new_tags == NULL)

	2048 return NULL;

	2049 for (j = 0; j < i; j++)

	2050 new_tags[j] = set1[s1].tags[j];

	2051 for (i = 0; i < num_tags; i++)

	2052 new_tags[j + i] = tags[i];

	2053 new_tags[j + i] = -1;

	2054 new_set[s1].tags = new_tags;

	2055 }

	2056 }

	2057

	2058 for (s2 = 0; set2[s2].position >= 0; s2++)

	2059 {

	2060 new_set[s1 + s2].position = set2[s2].position;

	2061 new_set[s1 + s2].code_min = set2[s2].code_min;

	2062 new_set[s1 + s2].code_max = set2[s2].code_max;

	2063 /* XXX - why not \| assertions here as well? */

	2064 new_set[s1 + s2].assertions = set2[s2].assertions;

	2065 new_set[s1 + s2].class = set2[s2].class;

	2066 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;

	2067 new_set[s1 + s2].backref = set2[s2].backref;

	2068 if (set2[s2].tags == NULL)

	2069 new_set[s1 + s2].tags = NULL;

	2070 else

	2071 {

	2072 for (i = 0; set2[s2].tags[i] >= 0; i++);

	2073 new_tags = tre_mem_alloc(mem, sizeof(new_tags) (i + 1));

	2074 if (new_tags == NULL)

	2075 return NULL;

	2076 for (j = 0; j < i; j++)

	2077 new_tags[j] = set2[s2].tags[j];

	2078 new_tags[j] = -1;

	2079 new_set[s1 + s2].tags = new_tags;

	2080 }

	2081 }

	2082 new_set[s1 + s2].position = -1;

	2083 return new_set;

	2084 }

	2085

	2086 /* Finds the empty path through `node' which is the one that should be

	2087 taken according to POSIX.2 rules, and adds the tags on that path to

	2088 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is

	2089 set to the number of tags seen on the path. */

	2090 static reg_errcode_t

	2091 tre_match_empty(tre_stack_t stack, tre_ast_node_t node, int *tags,

	2092 int assertions, int num_tags_seen)

	2093 {

	2094 tre_literal_t *lit;

	2095 tre_union_t *uni;

	2096 tre_catenation_t *cat;

	2097 tre_iteration_t *iter;

	2098 int i;

	2099 int bottom = tre_stack_num_objects(stack);

	2100 reg_errcode_t status = REG_OK;

	2101 if (num_tags_seen)

	2102 *num_tags_seen = 0;

	2103

	2104 status = tre_stack_push_voidptr(stack, node);

	2105

	2106 /* Walk through the tree recursively. */

	2107 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)

	2108 {

	2109 node = tre_stack_pop_voidptr(stack);

	2110

	2111 switch (node->type)

	2112 {

	2113 case LITERAL:

	2114 lit = (tre_literal_t *)node->obj;

	2115 switch (lit->code_min)

	2116 {

	2117 case TAG:

	2118 if (lit->code_max >= 0)

	2119 {

	2120 if (tags != NULL)

	2121 {

	2122 /* Add the tag to `tags'. */

	2123 for (i = 0; tags[i] >= 0; i++)

	2124 if (tags[i] == lit->code_max)

	2125 break;

	2126 if (tags[i] < 0)

	2127 {

	2128 tags[i] = lit->code_max;

	2129 tags[i + 1] = -1;

	2130 }

	2131 }

	2132 if (num_tags_seen)

	2133 (*num_tags_seen)++;

	2134 }

	2135 break;

	2136 case ASSERTION:

	2137 assert(lit->code_max >= 1

	2138 \|\| lit->code_max <= ASSERT_LAST);

	2139 if (assertions != NULL)

	2140 *assertions \|= lit->code_max;

	2141 break;

	2142 case EMPTY:

	2143 break;

	2144 default:

	2145 assert(0);

	2146 break;

	2147 }

	2148 break;

	2149

	2150 case UNION:

	2151 /* Subexpressions starting earlier take priority over ones

	2152 starting later, so we prefer the left subexpression over the

	2153 right subexpression. */

	2154 uni = (tre_union_t *)node->obj;

	2155 if (uni->left->nullable)

	2156 STACK_PUSHX(stack, voidptr, uni->left)

	2157 else if (uni->right->nullable)

	2158 STACK_PUSHX(stack, voidptr, uni->right)

	2159 else

	2160 assert(0);

	2161 break;

	2162

	2163 case CATENATION:

	2164 /* The path must go through both children. */

	2165 cat = (tre_catenation_t *)node->obj;

	2166 assert(cat->left->nullable);

	2167 assert(cat->right->nullable);

	2168 STACK_PUSHX(stack, voidptr, cat->left);

	2169 STACK_PUSHX(stack, voidptr, cat->right);

	2170 break;

	2171

	2172 case ITERATION:

	2173 /* A match with an empty string is preferred over no match at

	2174 all, so we go through the argument if possible. */

	2175 iter = (tre_iteration_t *)node->obj;

	2176 if (iter->arg->nullable)

	2177 STACK_PUSHX(stack, voidptr, iter->arg);

	2178 break;

	2179

	2180 default:

	2181 assert(0);

	2182 break;

	2183 }

	2184 }

	2185

	2186 return status;

	2187 }

	2188

	2189

	2190 typedef enum {

	2191 NFL_RECURSE,

	2192 NFL_POST_UNION,

	2193 NFL_POST_CATENATION,

	2194 NFL_POST_ITERATION

	2195 } tre_nfl_stack_symbol_t;

	2196

	2197

	2198 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for

	2199 the nodes of the AST `tree'. */

	2200 static reg_errcode_t

	2201 tre_compute_nfl(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree)

	2202 {

	2203 int bottom = tre_stack_num_objects(stack);

	2204

	2205 STACK_PUSHR(stack, voidptr, tree);

	2206 STACK_PUSHR(stack, int, NFL_RECURSE);

	2207

	2208 while (tre_stack_num_objects(stack) > bottom)

	2209 {

	2210 tre_nfl_stack_symbol_t symbol;

	2211 tre_ast_node_t *node;

	2212

	2213 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);

	2214 node = tre_stack_pop_voidptr(stack);

	2215 switch (symbol)

	2216 {

	2217 case NFL_RECURSE:

	2218 switch (node->type)

	2219 {

	2220 case LITERAL:

	2221 {

	2222 tre_literal_t lit = (tre_literal_t )node->obj;

	2223 if (IS_BACKREF(lit))

	2224 {

	2225 /* Back references: nullable = false, firstpos = {i},

	2226 lastpos = {i}. */

	2227 node->nullable = 0;

	2228 node->firstpos = tre_set_one(mem, lit->position, 0,

	2229 TRE_CHAR_MAX, 0, NULL, -1);

	2230 if (!node->firstpos)

	2231 return REG_ESPACE;

	2232 node->lastpos = tre_set_one(mem, lit->position, 0,

	2233 TRE_CHAR_MAX, 0, NULL,

	2234 (int)lit->code_max);

	2235 if (!node->lastpos)

	2236 return REG_ESPACE;

	2237 }

	2238 else if (lit->code_min < 0)

	2239 {

	2240 /* Tags, empty strings, params, and zero width assertions:

	2241 nullable = true, firstpos = {}, and lastpos = {}. */

	2242 node->nullable = 1;

	2243 node->firstpos = tre_set_empty(mem);

	2244 if (!node->firstpos)

	2245 return REG_ESPACE;

	2246 node->lastpos = tre_set_empty(mem);

	2247 if (!node->lastpos)

	2248 return REG_ESPACE;

	2249 }

	2250 else

	2251 {

	2252 /* Literal at position i: nullable = false, firstpos = {i},

	2253 lastpos = {i}. */

	2254 node->nullable = 0;

	2255 node->firstpos =

	2256 tre_set_one(mem, lit->position, (int)lit->code_min,

	2257 (int)lit->code_max, 0, NULL, -1);

	2258 if (!node->firstpos)

	2259 return REG_ESPACE;

	2260 node->lastpos = tre_set_one(mem, lit->position,

	2261 (int)lit->code_min,

	2262 (int)lit->code_max,

	2263 lit->class, lit->neg_classes,

	2264 -1);

	2265 if (!node->lastpos)

	2266 return REG_ESPACE;

	2267 }

	2268 break;

	2269 }

	2270

	2271 case UNION:

	2272 /* Compute the attributes for the two subtrees, and after that

	2273 for this node. */

	2274 STACK_PUSHR(stack, voidptr, node);

	2275 STACK_PUSHR(stack, int, NFL_POST_UNION);

	2276 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);

	2277 STACK_PUSHR(stack, int, NFL_RECURSE);

	2278 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);

	2279 STACK_PUSHR(stack, int, NFL_RECURSE);

	2280 break;

	2281

	2282 case CATENATION:

	2283 /* Compute the attributes for the two subtrees, and after that

	2284 for this node. */

	2285 STACK_PUSHR(stack, voidptr, node);

	2286 STACK_PUSHR(stack, int, NFL_POST_CATENATION);

	2287 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right );

	2288 STACK_PUSHR(stack, int, NFL_RECURSE);

	2289 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left) ;

	2290 STACK_PUSHR(stack, int, NFL_RECURSE);

	2291 break;

	2292

	2293 case ITERATION:

	2294 /* Compute the attributes for the subtree, and after that for

	2295 this node. */

	2296 STACK_PUSHR(stack, voidptr, node);

	2297 STACK_PUSHR(stack, int, NFL_POST_ITERATION);

	2298 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);

	2299 STACK_PUSHR(stack, int, NFL_RECURSE);

	2300 break;

	2301 }

	2302 break; /* end case: NFL_RECURSE */

	2303

	2304 case NFL_POST_UNION:

	2305 {

	2306 tre_union_t uni = (tre_union_t )node->obj;

	2307 node->nullable = uni->left->nullable \|\| uni->right->nullable;

	2308 node->firstpos = tre_set_union(mem, uni->left->firstpos,

	2309 uni->right->firstpos, NULL, 0);

	2310 if (!node->firstpos)

	2311 return REG_ESPACE;

	2312 node->lastpos = tre_set_union(mem, uni->left->lastpos,

	2313 uni->right->lastpos, NULL, 0);

	2314 if (!node->lastpos)

	2315 return REG_ESPACE;

	2316 break;

	2317 }

	2318

	2319 case NFL_POST_ITERATION:

	2320 {

	2321 tre_iteration_t iter = (tre_iteration_t )node->obj;

	2322

	2323 if (iter->min == 0 \|\| iter->arg->nullable)

	2324 node->nullable = 1;

	2325 else

	2326 node->nullable = 0;

	2327 node->firstpos = iter->arg->firstpos;

	2328 node->lastpos = iter->arg->lastpos;

	2329 break;

	2330 }

	2331

	2332 case NFL_POST_CATENATION:

	2333 {

	2334 int num_tags, *tags, assertions;

	2335 reg_errcode_t status;

	2336 tre_catenation_t *cat = node->obj;

	2337 node->nullable = cat->left->nullable && cat->right->nullable;

	2338

	2339 /* Compute firstpos. */

	2340 if (cat->left->nullable)

	2341 {

	2342 /* The left side matches the empty string. Make a first pass

	2343 with tre_match_empty() to get the number of tags and

	2344 parameters. */

	2345 status = tre_match_empty(stack, cat->left,

	2346 NULL, NULL, &num_tags);

	2347 if (status != REG_OK)

	2348 return status;

	2349 /* Allocate arrays for the tags and parameters. */

	2350 tags = xmalloc(sizeof(tags) (num_tags + 1));

	2351 if (!tags)

	2352 return REG_ESPACE;

	2353 tags[0] = -1;

	2354 assertions = 0;

	2355 /* Second pass with tre_mach_empty() to get the list of

	2356 tags and parameters. */

	2357 status = tre_match_empty(stack, cat->left, tags,

	2358 &assertions, NULL);

	2359 if (status != REG_OK)

	2360 {

	2361 xfree(tags);

	2362 return status;

	2363 }

	2364 node->firstpos =

	2365 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,

	2366 tags, assertions);

	2367 xfree(tags);

	2368 if (!node->firstpos)

	2369 return REG_ESPACE;

	2370 }

	2371 else

	2372 {

	2373 node->firstpos = cat->left->firstpos;

	2374 }

	2375

	2376 /* Compute lastpos. */

	2377 if (cat->right->nullable)

	2378 {

	2379 /* The right side matches the empty string. Make a first pass

	2380 with tre_match_empty() to get the number of tags and

	2381 parameters. */

	2382 status = tre_match_empty(stack, cat->right,

	2383 NULL, NULL, &num_tags);

	2384 if (status != REG_OK)

	2385 return status;

	2386 /* Allocate arrays for the tags and parameters. */

	2387 tags = xmalloc(sizeof(int) * (num_tags + 1));

	2388 if (!tags)

	2389 return REG_ESPACE;

	2390 tags[0] = -1;

	2391 assertions = 0;

	2392 /* Second pass with tre_mach_empty() to get the list of

	2393 tags and parameters. */

	2394 status = tre_match_empty(stack, cat->right, tags,

	2395 &assertions, NULL);

	2396 if (status != REG_OK)

	2397 {

	2398 xfree(tags);

	2399 return status;

	2400 }

	2401 node->lastpos =

	2402 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,

	2403 tags, assertions);

	2404 xfree(tags);

	2405 if (!node->lastpos)

	2406 return REG_ESPACE;

	2407 }

	2408 else

	2409 {

	2410 node->lastpos = cat->right->lastpos;

	2411 }

	2412 break;

	2413 }

	2414

	2415 default:

	2416 assert(0);

	2417 break;

	2418 }

	2419 }

	2420

	2421 return REG_OK;

	2422 }

	2423

	2424

	2425 /* Adds a transition from each position in `p1' to each position in `p2'. */

	2426 static reg_errcode_t

	2427 tre_make_trans(tre_pos_and_tags_t p1, tre_pos_and_tags_t p2,

	2428 tre_tnfa_transition_t *transitions,

	2429 int counts, int offs)

	2430 {

	2431 tre_pos_and_tags_t *orig_p2 = p2;

	2432 tre_tnfa_transition_t *trans;

	2433 int i, j, k, l, dup, prev_p2_pos;

	2434

	2435 if (transitions != NULL)

	2436 while (p1->position >= 0)

	2437 {

	2438 p2 = orig_p2;

	2439 prev_p2_pos = -1;

	2440 while (p2->position >= 0)

	2441 {

	2442 /* Optimization: if this position was already handled, skip it. */

	2443 if (p2->position == prev_p2_pos)

	2444 {

	2445 p2++;

	2446 continue;

	2447 }

	2448 prev_p2_pos = p2->position;

	2449 /* Set `trans' to point to the next unused transition from

	2450 position `p1->position'. */

	2451 trans = transitions + offs[p1->position];

	2452 while (trans->state != NULL)

	2453 {

	2454 #if 0

	2455 /* If we find a previous transition from `p1->position' to

	2456 `p2->position', it is overwritten. This can happen only

	2457 if there are nested loops in the regexp, like in "((a))".

	2458 In POSIX.2 repetition using the outer loop is always

	2459 preferred over using the inner loop. Therefore the

	2460 transition for the inner loop is useless and can be thrown

	2461 away. */

	2462 /* XXX - The same position is used for all nodes in a bracket

	2463 expression, so this optimization cannot be used (it will

	2464 break bracket expressions) unless I figure out a way to

	2465 detect it here. */

	2466 if (trans->state_id == p2->position)

	2467 {

	2468 break;

	2469 }

	2470 #endif

	2471 trans++;

	2472 }

	2473

	2474 if (trans->state == NULL)

	2475 (trans + 1)->state = NULL;

	2476 /* Use the character ranges, assertions, etc. from `p1' for

	2477 the transition from `p1' to `p2'. */

	2478 trans->code_min = p1->code_min;

	2479 trans->code_max = p1->code_max;

	2480 trans->state = transitions + offs[p2->position];

	2481 trans->state_id = p2->position;

	2482 trans->assertions = p1->assertions \| p2->assertions

	2483 \| (p1->class ? ASSERT_CHAR_CLASS : 0)

	2484 \| (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);

	2485 if (p1->backref >= 0)

	2486 {

	2487 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);

	2488 assert(p2->backref < 0);

	2489 trans->u.backref = p1->backref;

	2490 trans->assertions \|= ASSERT_BACKREF;

	2491 }

	2492 else

	2493 trans->u.class = p1->class;

	2494 if (p1->neg_classes != NULL)

	2495 {

	2496 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);

	2497 trans->neg_classes =

	2498 xmalloc(sizeof(trans->neg_classes) (i + 1));

	2499 if (trans->neg_classes == NULL)

	2500 return REG_ESPACE;

	2501 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)

	2502 trans->neg_classes[i] = p1->neg_classes[i];

	2503 trans->neg_classes[i] = (tre_ctype_t)0;

	2504 }

	2505 else

	2506 trans->neg_classes = NULL;

	2507

	2508 /* Find out how many tags this transition has. */

	2509 i = 0;

	2510 if (p1->tags != NULL)

	2511 while(p1->tags[i] >= 0)

	2512 i++;

	2513 j = 0;

	2514 if (p2->tags != NULL)

	2515 while(p2->tags[j] >= 0)

	2516 j++;

	2517

	2518 /* If we are overwriting a transition, free the old tag array. */

	2519 if (trans->tags != NULL)

	2520 xfree(trans->tags);

	2521 trans->tags = NULL;

	2522

	2523 /* If there were any tags, allocate an array and fill it. */

	2524 if (i + j > 0)

	2525 {

	2526 trans->tags = xmalloc(sizeof(trans->tags) (i + j + 1));

	2527 if (!trans->tags)

	2528 return REG_ESPACE;

	2529 i = 0;

	2530 if (p1->tags != NULL)

	2531 while(p1->tags[i] >= 0)

	2532 {

	2533 trans->tags[i] = p1->tags[i];

	2534 i++;

	2535 }

	2536 l = i;

	2537 j = 0;

	2538 if (p2->tags != NULL)

	2539 while (p2->tags[j] >= 0)

	2540 {

	2541 /* Don't add duplicates. */

	2542 dup = 0;

	2543 for (k = 0; k < i; k++)

	2544 if (trans->tags[k] == p2->tags[j])

	2545 {

	2546 dup = 1;

	2547 break;

	2548 }

	2549 if (!dup)

	2550 trans->tags[l++] = p2->tags[j];

	2551 j++;

	2552 }

	2553 trans->tags[l] = -1;

	2554 }

	2555

	2556 p2++;

	2557 }

	2558 p1++;

	2559 }

	2560 else

	2561 /* Compute a maximum limit for the number of transitions leaving

	2562 from each state. */

	2563 while (p1->position >= 0)

	2564 {

	2565 p2 = orig_p2;

	2566 while (p2->position >= 0)

	2567 {

	2568 counts[p1->position]++;

	2569 p2++;

	2570 }

	2571 p1++;

	2572 }

	2573 return REG_OK;

	2574 }

	2575

	2576 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are

	2577 labelled with one character range (there are no transitions on empty

	2578 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of

	2579 the regexp. */

	2580 static reg_errcode_t

	2581 tre_ast_to_tnfa(tre_ast_node_t node, tre_tnfa_transition_t transitions,

	2582 int counts, int offs)

	2583 {

	2584 tre_union_t *uni;

	2585 tre_catenation_t *cat;

	2586 tre_iteration_t *iter;

	2587 reg_errcode_t errcode = REG_OK;

	2588

	2589 /* XXX - recurse using a stack!. */

	2590 switch (node->type)

	2591 {

	2592 case LITERAL:

	2593 break;

	2594 case UNION:

	2595 uni = (tre_union_t *)node->obj;

	2596 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);

	2597 if (errcode != REG_OK)

	2598 return errcode;

	2599 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);

	2600 break;

	2601

	2602 case CATENATION:

	2603 cat = (tre_catenation_t *)node->obj;

	2604 /* Add a transition from each position in cat->left->lastpos

	2605 to each position in cat->right->firstpos. */

	2606 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,

	2607 transitions, counts, offs);

	2608 if (errcode != REG_OK)

	2609 return errcode;

	2610 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);

	2611 if (errcode != REG_OK)

	2612 return errcode;

	2613 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);

	2614 break;

	2615

	2616 case ITERATION:

	2617 iter = (tre_iteration_t *)node->obj;

	2618 assert(iter->max == -1 \|\| iter->max == 1);

	2619

	2620 if (iter->max == -1)

	2621 {

	2622 assert(iter->min == 0 \|\| iter->min == 1);

	2623 /* Add a transition from each last position in the iterated

	2624 expression to each first position. */

	2625 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,

	2626 transitions, counts, offs);

	2627 if (errcode != REG_OK)

	2628 return errcode;

	2629 }

	2630 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);

	2631 break;

	2632 }

	2633 return errcode;

	2634 }

	2635

	2636

	2637 #define ERROR_EXIT(err) \

	2638 do \

	2639 { \

	2640 errcode = err; \

	2641 if (/CONSTCOND/1) \

	2642 goto error_exit; \

	2643 } \

	2644 while (/CONSTCOND/0)

	2645

	2646

	2647 int

	2648 regcomp(regex_t restrict preg, const char restrict regex, int cflags)

	2649 {

	2650 tre_stack_t *stack;

	2651 tre_ast_node_t tree, tmp_ast_l, *tmp_ast_r;

	2652 tre_pos_and_tags_t *p;

	2653 int counts = NULL, offs = NULL;

	2654 int i, add = 0;

	2655 tre_tnfa_transition_t transitions, initial;

	2656 tre_tnfa_t *tnfa = NULL;

	2657 tre_submatch_data_t *submatch_data;

	2658 tre_tag_direction_t *tag_directions = NULL;

	2659 reg_errcode_t errcode;

	2660 tre_mem_t mem;

	2661

	2662 /* Parse context. */

	2663 tre_parse_ctx_t parse_ctx;

	2664

	2665 /* Allocate a stack used throughout the compilation process for various

	2666 purposes. */

	2667 stack = tre_stack_new(512, 10240, 128);

	2668 if (!stack)

	2669 return REG_ESPACE;

	2670 /* Allocate a fast memory allocator. */

	2671 mem = tre_mem_new();

	2672 if (!mem)

	2673 {

	2674 tre_stack_destroy(stack);

	2675 return REG_ESPACE;

	2676 }

	2677

	2678 /* Parse the regexp. */

	2679 memset(&parse_ctx, 0, sizeof(parse_ctx));

	2680 parse_ctx.mem = mem;

	2681 parse_ctx.stack = stack;

	2682 parse_ctx.re = regex;

	2683 parse_ctx.cflags = cflags;

	2684 parse_ctx.max_backref = -1;

	2685 errcode = tre_parse(&parse_ctx);

	2686 if (errcode != REG_OK)

	2687 ERROR_EXIT(errcode);

	2688 preg->re_nsub = parse_ctx.submatch_id - 1;

	2689 tree = parse_ctx.n;

	2690

	2691 #ifdef TRE_DEBUG

	2692 tre_ast_print(tree);

	2693 #endif /* TRE_DEBUG */

	2694

	2695 /* Referring to nonexistent subexpressions is illegal. */

	2696 if (parse_ctx.max_backref > (int)preg->re_nsub)

	2697 ERROR_EXIT(REG_ESUBREG);

	2698

	2699 /* Allocate the TNFA struct. */

	2700 tnfa = xcalloc(1, sizeof(tre_tnfa_t));

	2701 if (tnfa == NULL)

	2702 ERROR_EXIT(REG_ESPACE);

	2703 tnfa->have_backrefs = parse_ctx.max_backref >= 0;

	2704 tnfa->have_approx = 0;

	2705 tnfa->num_submatches = parse_ctx.submatch_id;

	2706

	2707 /* Set up tags for submatch addressing. If REG_NOSUB is set and the

	2708 regexp does not have back references, this can be skipped. */

	2709 if (tnfa->have_backrefs \|\| !(cflags & REG_NOSUB))

	2710 {

	2711

	2712 /* Figure out how many tags we will need. */

	2713 errcode = tre_add_tags(NULL, stack, tree, tnfa);

	2714 if (errcode != REG_OK)

	2715 ERROR_EXIT(errcode);

	2716

	2717 if (tnfa->num_tags > 0)

	2718 {

	2719 tag_directions = xmalloc(sizeof(*tag_directions)

	2720 * (tnfa->num_tags + 1));

	2721 if (tag_directions == NULL)

	2722 ERROR_EXIT(REG_ESPACE);

	2723 tnfa->tag_directions = tag_directions;

	2724 memset(tag_directions, -1,

	2725 sizeof(tag_directions) (tnfa->num_tags + 1));

	2726 }

	2727 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,

	2728 sizeof(*tnfa->minimal_tags));

	2729 if (tnfa->minimal_tags == NULL)

	2730 ERROR_EXIT(REG_ESPACE);

	2731

	2732 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,

	2733 sizeof(*submatch_data));

	2734 if (submatch_data == NULL)

	2735 ERROR_EXIT(REG_ESPACE);

	2736 tnfa->submatch_data = submatch_data;

	2737

	2738 errcode = tre_add_tags(mem, stack, tree, tnfa);

	2739 if (errcode != REG_OK)

	2740 ERROR_EXIT(errcode);

	2741

	2742 }

	2743

	2744 /* Expand iteration nodes. */

	2745 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,

	2746 tag_directions);

	2747 if (errcode != REG_OK)

	2748 ERROR_EXIT(errcode);

	2749

	2750 /* Add a dummy node for the final state.

	2751 XXX - For certain patterns this dummy node can be optimized away,

	2752 for example "a" or "ab". Figure out a simple way to detect

	2753 this possibility. */

	2754 tmp_ast_l = tree;

	2755 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);

	2756 if (tmp_ast_r == NULL)

	2757 ERROR_EXIT(REG_ESPACE);

	2758

	2759 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);

	2760 if (tree == NULL)

	2761 ERROR_EXIT(REG_ESPACE);

	2762

	2763 errcode = tre_compute_nfl(mem, stack, tree);

	2764 if (errcode != REG_OK)

	2765 ERROR_EXIT(errcode);

	2766

	2767 counts = xmalloc(sizeof(int) * parse_ctx.position);

	2768 if (counts == NULL)

	2769 ERROR_EXIT(REG_ESPACE);

	2770

	2771 offs = xmalloc(sizeof(int) * parse_ctx.position);

	2772 if (offs == NULL)

	2773 ERROR_EXIT(REG_ESPACE);

	2774

	2775 for (i = 0; i < parse_ctx.position; i++)

	2776 counts[i] = 0;

	2777 tre_ast_to_tnfa(tree, NULL, counts, NULL);

	2778

	2779 add = 0;

	2780 for (i = 0; i < parse_ctx.position; i++)

	2781 {

	2782 offs[i] = add;

	2783 add += counts[i] + 1;

	2784 counts[i] = 0;

	2785 }

	2786 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));

	2787 if (transitions == NULL)

	2788 ERROR_EXIT(REG_ESPACE);

	2789 tnfa->transitions = transitions;

	2790 tnfa->num_transitions = add;

	2791

	2792 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);

	2793 if (errcode != REG_OK)

	2794 ERROR_EXIT(errcode);

	2795

	2796 tnfa->firstpos_chars = NULL;

	2797

	2798 p = tree->firstpos;

	2799 i = 0;

	2800 while (p->position >= 0)

	2801 {

	2802 i++;

	2803 p++;

	2804 }

	2805

	2806 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));

	2807 if (initial == NULL)

	2808 ERROR_EXIT(REG_ESPACE);

	2809 tnfa->initial = initial;

	2810

	2811 i = 0;

	2812 for (p = tree->firstpos; p->position >= 0; p++)

	2813 {

	2814 initial[i].state = transitions + offs[p->position];

	2815 initial[i].state_id = p->position;

	2816 initial[i].tags = NULL;

	2817 /* Copy the arrays p->tags, and p->params, they are allocated

	2818 from a tre_mem object. */

	2819 if (p->tags)

	2820 {

	2821 int j;

	2822 for (j = 0; p->tags[j] >= 0; j++);

	2823 initial[i].tags = xmalloc(sizeof(p->tags) (j + 1));

	2824 if (!initial[i].tags)

	2825 ERROR_EXIT(REG_ESPACE);

	2826 memcpy(initial[i].tags, p->tags, sizeof(p->tags) (j + 1));

	2827 }

	2828 initial[i].assertions = p->assertions;

	2829 i++;

	2830 }

	2831 initial[i].state = NULL;

	2832

	2833 tnfa->num_transitions = add;

	2834 tnfa->final = transitions + offs[tree->lastpos[0].position];

	2835 tnfa->num_states = parse_ctx.position;

	2836 tnfa->cflags = cflags;

	2837

	2838 tre_mem_destroy(mem);

	2839 tre_stack_destroy(stack);

	2840 xfree(counts);

	2841 xfree(offs);

	2842

	2843 preg->TRE_REGEX_T_FIELD = (void *)tnfa;

	2844 return REG_OK;

	2845

	2846 error_exit:

	2847 /* Free everything that was allocated and return the error code. */

	2848 tre_mem_destroy(mem);

	2849 if (stack != NULL)

	2850 tre_stack_destroy(stack);

	2851 if (counts != NULL)

	2852 xfree(counts);

	2853 if (offs != NULL)

	2854 xfree(offs);

	2855 preg->TRE_REGEX_T_FIELD = (void *)tnfa;

	2856 regfree(preg);

	2857 return errcode;

	2858 }

	2859

	2860

	2861

	2862

	2863 void

	2864 regfree(regex_t *preg)

	2865 {

	2866 tre_tnfa_t *tnfa;

	2867 unsigned int i;

	2868 tre_tnfa_transition_t *trans;

	2869

	2870 tnfa = (void *)preg->TRE_REGEX_T_FIELD;

	2871 if (!tnfa)

	2872 return;

	2873

	2874 for (i = 0; i < tnfa->num_transitions; i++)

	2875 if (tnfa->transitions[i].state)

	2876 {

	2877 if (tnfa->transitions[i].tags)

	2878 xfree(tnfa->transitions[i].tags);

	2879 if (tnfa->transitions[i].neg_classes)

	2880 xfree(tnfa->transitions[i].neg_classes);

	2881 }

	2882 if (tnfa->transitions)

	2883 xfree(tnfa->transitions);

	2884

	2885 if (tnfa->initial)

	2886 {

	2887 for (trans = tnfa->initial; trans->state; trans++)

	2888 {

	2889 if (trans->tags)

	2890 xfree(trans->tags);

	2891 }

	2892 xfree(tnfa->initial);

	2893 }

	2894

	2895 if (tnfa->submatch_data)

	2896 {

	2897 for (i = 0; i < tnfa->num_submatches; i++)

	2898 if (tnfa->submatch_data[i].parents)

	2899 xfree(tnfa->submatch_data[i].parents);

	2900 xfree(tnfa->submatch_data);

	2901 }

	2902

	2903 if (tnfa->tag_directions)

	2904 xfree(tnfa->tag_directions);

	2905 if (tnfa->firstpos_chars)

	2906 xfree(tnfa->firstpos_chars);

	2907 if (tnfa->minimal_tags)

	2908 xfree(tnfa->minimal_tags);

	2909 xfree(tnfa);

	2910 }

OLD	NEW

« no previous file with comments | « fusl/src/regex/glob.c ('k') | fusl/src/regex/regerror.c » ('j') | no next file with comments »