Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(105)

Unified Diff: fusl/src/regex/regcomp.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: headers too Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: fusl/src/regex/regcomp.c
diff --git a/fusl/src/regex/regcomp.c b/fusl/src/regex/regcomp.c
index da6abd18580116206c70c7a8e4a91152f2ab50e9..0e1255f707f38704f3275f4aa28dd1fbad9ed8ef 100644
--- a/fusl/src/regex/regcomp.c
+++ b/fusl/src/regex/regcomp.c
@@ -48,53 +48,45 @@ typedef struct {
int position;
int code_min;
int code_max;
- int *tags;
+ int* tags;
int assertions;
tre_ctype_t class;
- tre_ctype_t *neg_classes;
+ tre_ctype_t* neg_classes;
int backref;
} tre_pos_and_tags_t;
-
/***********************************************************************
from tre-ast.c and tre-ast.h
***********************************************************************/
/* The different AST node types. */
-typedef enum {
- LITERAL,
- CATENATION,
- ITERATION,
- UNION
-} tre_ast_type_t;
+typedef enum { LITERAL, CATENATION, ITERATION, UNION } tre_ast_type_t;
/* Special subtypes of TRE_LITERAL. */
-#define EMPTY -1 /* Empty leaf (denotes empty string). */
-#define ASSERTION -2 /* Assertion leaf. */
-#define TAG -3 /* Tag leaf. */
-#define BACKREF -4 /* Back reference leaf. */
+#define EMPTY -1 /* Empty leaf (denotes empty string). */
+#define ASSERTION -2 /* Assertion leaf. */
+#define TAG -3 /* Tag leaf. */
+#define BACKREF -4 /* Back reference leaf. */
-#define IS_SPECIAL(x) ((x)->code_min < 0)
-#define IS_EMPTY(x) ((x)->code_min == EMPTY)
+#define IS_SPECIAL(x) ((x)->code_min < 0)
+#define IS_EMPTY(x) ((x)->code_min == EMPTY)
#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
-#define IS_TAG(x) ((x)->code_min == TAG)
-#define IS_BACKREF(x) ((x)->code_min == BACKREF)
-
+#define IS_TAG(x) ((x)->code_min == TAG)
+#define IS_BACKREF(x) ((x)->code_min == BACKREF)
/* A generic AST node. All AST nodes consist of this node on the top
level with `obj' pointing to the actual content. */
typedef struct {
- tre_ast_type_t type; /* Type of the node. */
- void *obj; /* Pointer to actual node. */
+ tre_ast_type_t type; /* Type of the node. */
+ void* obj; /* Pointer to actual node. */
int nullable;
int submatch_id;
int num_submatches;
int num_tags;
- tre_pos_and_tags_t *firstpos;
- tre_pos_and_tags_t *lastpos;
+ tre_pos_and_tags_t* firstpos;
+ tre_pos_and_tags_t* lastpos;
} tre_ast_node_t;
-
/* A "literal" node. These are created for assertions, back references,
tags, matching parameter settings, and all expressions that match one
character. */
@@ -103,7 +95,7 @@ typedef struct {
long code_max;
int position;
tre_ctype_t class;
- tre_ctype_t *neg_classes;
+ tre_ctype_t* neg_classes;
} tre_literal_t;
/* A "catenation" node. These are created when two regexps are concatenated.
@@ -111,15 +103,15 @@ typedef struct {
holds all but the last, and `right' part holds the last subexpression
(catenation is left associative). */
typedef struct {
- tre_ast_node_t *left;
- tre_ast_node_t *right;
+ tre_ast_node_t* left;
+ tre_ast_node_t* right;
} tre_catenation_t;
/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
operators. */
typedef struct {
/* Subexpression to match. */
- tre_ast_node_t *arg;
+ tre_ast_node_t* arg;
/* Minimum number of consecutive matches. */
int min;
/* Maximum number of consecutive matches. */
@@ -127,100 +119,99 @@ typedef struct {
/* If 0, match as many characters as possible, if 1 match as few as
possible. Note that this does not always mean the same thing as
matching as many/few repetitions as possible. */
- unsigned int minimal:1;
+ unsigned int minimal : 1;
} tre_iteration_t;
/* An "union" node. These are created for the "|" operator. */
typedef struct {
- tre_ast_node_t *left;
- tre_ast_node_t *right;
+ tre_ast_node_t* left;
+ tre_ast_node_t* right;
} tre_union_t;
-
-static tre_ast_node_t *
-tre_ast_new_node(tre_mem_t mem, int type, void *obj)
-{
- tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
- if (!node || !obj)
- return 0;
- node->obj = obj;
- node->type = type;
- node->nullable = -1;
- node->submatch_id = -1;
- return node;
+static tre_ast_node_t* tre_ast_new_node(tre_mem_t mem, int type, void* obj) {
+ tre_ast_node_t* node = tre_mem_calloc(mem, sizeof *node);
+ if (!node || !obj)
+ return 0;
+ node->obj = obj;
+ node->type = type;
+ node->nullable = -1;
+ node->submatch_id = -1;
+ return node;
}
-static tre_ast_node_t *
-tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
-{
- tre_ast_node_t *node;
- tre_literal_t *lit;
-
- lit = tre_mem_calloc(mem, sizeof *lit);
- node = tre_ast_new_node(mem, LITERAL, lit);
- if (!node)
- return 0;
- lit->code_min = code_min;
- lit->code_max = code_max;
- lit->position = position;
- return node;
+static tre_ast_node_t* tre_ast_new_literal(tre_mem_t mem,
+ int code_min,
+ int code_max,
+ int position) {
+ tre_ast_node_t* node;
+ tre_literal_t* lit;
+
+ lit = tre_mem_calloc(mem, sizeof *lit);
+ node = tre_ast_new_node(mem, LITERAL, lit);
+ if (!node)
+ return 0;
+ lit->code_min = code_min;
+ lit->code_max = code_max;
+ lit->position = position;
+ return node;
}
-static tre_ast_node_t *
-tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
-{
- tre_ast_node_t *node;
- tre_iteration_t *iter;
-
- iter = tre_mem_calloc(mem, sizeof *iter);
- node = tre_ast_new_node(mem, ITERATION, iter);
- if (!node)
- return 0;
- iter->arg = arg;
- iter->min = min;
- iter->max = max;
- iter->minimal = minimal;
- node->num_submatches = arg->num_submatches;
- return node;
+static tre_ast_node_t* tre_ast_new_iter(tre_mem_t mem,
+ tre_ast_node_t* arg,
+ int min,
+ int max,
+ int minimal) {
+ tre_ast_node_t* node;
+ tre_iteration_t* iter;
+
+ iter = tre_mem_calloc(mem, sizeof *iter);
+ node = tre_ast_new_node(mem, ITERATION, iter);
+ if (!node)
+ return 0;
+ iter->arg = arg;
+ iter->min = min;
+ iter->max = max;
+ iter->minimal = minimal;
+ node->num_submatches = arg->num_submatches;
+ return node;
}
-static tre_ast_node_t *
-tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
-{
- tre_ast_node_t *node;
- tre_union_t *un;
-
- if (!left)
- return right;
- un = tre_mem_calloc(mem, sizeof *un);
- node = tre_ast_new_node(mem, UNION, un);
- if (!node || !right)
- return 0;
- un->left = left;
- un->right = right;
- node->num_submatches = left->num_submatches + right->num_submatches;
- return node;
+static tre_ast_node_t* tre_ast_new_union(tre_mem_t mem,
+ tre_ast_node_t* left,
+ tre_ast_node_t* right) {
+ tre_ast_node_t* node;
+ tre_union_t* un;
+
+ if (!left)
+ return right;
+ un = tre_mem_calloc(mem, sizeof *un);
+ node = tre_ast_new_node(mem, UNION, un);
+ if (!node || !right)
+ return 0;
+ un->left = left;
+ un->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+ return node;
}
-static tre_ast_node_t *
-tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
-{
- tre_ast_node_t *node;
- tre_catenation_t *cat;
-
- if (!left)
- return right;
- cat = tre_mem_calloc(mem, sizeof *cat);
- node = tre_ast_new_node(mem, CATENATION, cat);
- if (!node)
- return 0;
- cat->left = left;
- cat->right = right;
- node->num_submatches = left->num_submatches + right->num_submatches;
- return node;
+static tre_ast_node_t* tre_ast_new_catenation(tre_mem_t mem,
+ tre_ast_node_t* left,
+ tre_ast_node_t* right) {
+ tre_ast_node_t* node;
+ tre_catenation_t* cat;
+
+ if (!left)
+ return right;
+ cat = tre_mem_calloc(mem, sizeof *cat);
+ node = tre_ast_new_node(mem, CATENATION, cat);
+ if (!node)
+ return 0;
+ cat->left = left;
+ cat->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+ return node;
}
-
/***********************************************************************
from tre-stack.c and tre-stack.h
***********************************************************************/
@@ -231,61 +222,56 @@ typedef struct tre_stack_rec tre_stack_t;
is maximum size, and `increment' specifies how much more space will be
allocated with realloc() if all space gets used up. Returns the stack
object or NULL if out of memory. */
-static tre_stack_t *
-tre_stack_new(int size, int max_size, int increment);
+static tre_stack_t* tre_stack_new(int size, int max_size, int increment);
/* Frees the stack object. */
-static void
-tre_stack_destroy(tre_stack_t *s);
+static void tre_stack_destroy(tre_stack_t* s);
/* Returns the current number of objects in the stack. */
-static int
-tre_stack_num_objects(tre_stack_t *s);
+static int tre_stack_num_objects(tre_stack_t* s);
/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
`value' on top of stack `s'. Returns REG_ESPACE if out of memory.
This tries to realloc() more space before failing if maximum size
has not yet been reached. Returns REG_OK if successful. */
-#define declare_pushf(typetag, type) \
- static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
+#define declare_pushf(typetag, type) \
+ static reg_errcode_t tre_stack_push_##typetag(tre_stack_t* s, type value)
-declare_pushf(voidptr, void *);
+declare_pushf(voidptr, void*);
declare_pushf(int, int);
/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
element off of stack `s' and returns it. The stack must not be
empty. */
-#define declare_popf(typetag, type) \
- static type tre_stack_pop_ ## typetag(tre_stack_t *s)
+#define declare_popf(typetag, type) \
+ static type tre_stack_pop_##typetag(tre_stack_t* s)
-declare_popf(voidptr, void *);
+declare_popf(voidptr, void*);
declare_popf(int, int);
/* Just to save some typing. */
-#define STACK_PUSH(s, typetag, value) \
- do \
- { \
- status = tre_stack_push_ ## typetag(s, value); \
- } \
- while (/*CONSTCOND*/0)
-
-#define STACK_PUSHX(s, typetag, value) \
- { \
- status = tre_stack_push_ ## typetag(s, value); \
- if (status != REG_OK) \
- break; \
+#define STACK_PUSH(s, typetag, value) \
+ do { \
+ status = tre_stack_push_##typetag(s, value); \
+ } while (/*CONSTCOND*/ 0)
+
+#define STACK_PUSHX(s, typetag, value) \
+ { \
+ status = tre_stack_push_##typetag(s, value); \
+ if (status != REG_OK) \
+ break; \
}
-#define STACK_PUSHR(s, typetag, value) \
- { \
- reg_errcode_t _status; \
- _status = tre_stack_push_ ## typetag(s, value); \
- if (_status != REG_OK) \
- return _status; \
+#define STACK_PUSHR(s, typetag, value) \
+ { \
+ reg_errcode_t _status; \
+ _status = tre_stack_push_##typetag(s, value); \
+ if (_status != REG_OK) \
+ return _status; \
}
union tre_stack_item {
- void *voidptr_value;
+ void* voidptr_value;
int int_value;
};
@@ -294,218 +280,198 @@ struct tre_stack_rec {
int max_size;
int increment;
int ptr;
- union tre_stack_item *stack;
+ union tre_stack_item* stack;
};
-
-static tre_stack_t *
-tre_stack_new(int size, int max_size, int increment)
-{
- tre_stack_t *s;
+static tre_stack_t* tre_stack_new(int size, int max_size, int increment) {
+ tre_stack_t* s;
s = xmalloc(sizeof(*s));
- if (s != NULL)
- {
- s->stack = xmalloc(sizeof(*s->stack) * size);
- if (s->stack == NULL)
- {
- xfree(s);
- return NULL;
- }
- s->size = size;
- s->max_size = max_size;
- s->increment = increment;
- s->ptr = 0;
+ if (s != NULL) {
+ s->stack = xmalloc(sizeof(*s->stack) * size);
+ if (s->stack == NULL) {
+ xfree(s);
+ return NULL;
}
+ s->size = size;
+ s->max_size = max_size;
+ s->increment = increment;
+ s->ptr = 0;
+ }
return s;
}
-static void
-tre_stack_destroy(tre_stack_t *s)
-{
+static void tre_stack_destroy(tre_stack_t* s) {
xfree(s->stack);
xfree(s);
}
-static int
-tre_stack_num_objects(tre_stack_t *s)
-{
+static int tre_stack_num_objects(tre_stack_t* s) {
return s->ptr;
}
-static reg_errcode_t
-tre_stack_push(tre_stack_t *s, union tre_stack_item value)
-{
- if (s->ptr < s->size)
- {
- s->stack[s->ptr] = value;
- s->ptr++;
- }
- else
- {
- if (s->size >= s->max_size)
- {
- return REG_ESPACE;
- }
- else
- {
- union tre_stack_item *new_buffer;
- int new_size;
- new_size = s->size + s->increment;
- if (new_size > s->max_size)
- new_size = s->max_size;
- new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
- if (new_buffer == NULL)
- {
- return REG_ESPACE;
- }
- assert(new_size > s->size);
- s->size = new_size;
- s->stack = new_buffer;
- tre_stack_push(s, value);
- }
+static reg_errcode_t tre_stack_push(tre_stack_t* s,
+ union tre_stack_item value) {
+ if (s->ptr < s->size) {
+ s->stack[s->ptr] = value;
+ s->ptr++;
+ } else {
+ if (s->size >= s->max_size) {
+ return REG_ESPACE;
+ } else {
+ union tre_stack_item* new_buffer;
+ int new_size;
+ new_size = s->size + s->increment;
+ if (new_size > s->max_size)
+ new_size = s->max_size;
+ new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
+ if (new_buffer == NULL) {
+ return REG_ESPACE;
+ }
+ assert(new_size > s->size);
+ s->size = new_size;
+ s->stack = new_buffer;
+ tre_stack_push(s, value);
}
+ }
return REG_OK;
}
-#define define_pushf(typetag, type) \
- declare_pushf(typetag, type) { \
- union tre_stack_item item; \
- item.typetag ## _value = value; \
- return tre_stack_push(s, item); \
-}
-
-define_pushf(int, int)
-define_pushf(voidptr, void *)
-
-#define define_popf(typetag, type) \
- declare_popf(typetag, type) { \
- return s->stack[--s->ptr].typetag ## _value; \
+#define define_pushf(typetag, type) \
+ declare_pushf(typetag, type) { \
+ union tre_stack_item item; \
+ item.typetag##_value = value; \
+ return tre_stack_push(s, item); \
}
-define_popf(int, int)
-define_popf(voidptr, void *)
+define_pushf(int, int) define_pushf(voidptr, void*)
+#define define_popf(typetag, type) \
+ declare_popf(typetag, type) { return s->stack[--s->ptr].typetag##_value; }
+ define_popf(int, int) define_popf(voidptr, void*)
-/***********************************************************************
- from tre-parse.c and tre-parse.h
-***********************************************************************/
+ /***********************************************************************
+ from tre-parse.c and tre-parse.h
+ ***********************************************************************/
-/* Parse context. */
-typedef struct {
- /* Memory allocator. The AST is allocated using this. */
- tre_mem_t mem;
- /* Stack used for keeping track of regexp syntax. */
- tre_stack_t *stack;
- /* The parsed node after a parse function returns. */
- tre_ast_node_t *n;
- /* Position in the regexp pattern after a parse function returns. */
- const char *s;
- /* The first character of the regexp. */
- const char *re;
- /* Current submatch ID. */
- int submatch_id;
- /* Current position (number of literal). */
- int position;
- /* The highest back reference or -1 if none seen so far. */
- int max_backref;
- /* Compilation flags. */
- int cflags;
+ /* Parse context. */
+ typedef struct {
+ /* Memory allocator. The AST is allocated using this. */
+ tre_mem_t mem;
+ /* Stack used for keeping track of regexp syntax. */
+ tre_stack_t* stack;
+ /* The parsed node after a parse function returns. */
+ tre_ast_node_t* n;
+ /* Position in the regexp pattern after a parse function returns. */
+ const char* s;
+ /* The first character of the regexp. */
+ const char* re;
+ /* Current submatch ID. */
+ int submatch_id;
+ /* Current position (number of literal). */
+ int position;
+ /* The highest back reference or -1 if none seen so far. */
+ int max_backref;
+ /* Compilation flags. */
+ int cflags;
} tre_parse_ctx_t;
/* Some macros for expanding \w, \s, etc. */
static const struct {
- char c;
- const char *expansion;
-} tre_macros[] = {
- {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
- {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
- {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
- {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
- { 0, 0 }
-};
+ char c;
+ const char* expansion;
+} tre_macros[] = {{'t', "\t"},
+ {'n', "\n"},
+ {'r', "\r"},
+ {'f', "\f"},
+ {'a', "\a"},
+ {'e', "\033"},
+ {'w', "[[:alnum:]_]"},
+ {'W', "[^[:alnum:]_]"},
+ {'s', "[[:space:]]"},
+ {'S', "[^[:space:]]"},
+ {'d', "[[:digit:]]"},
+ {'D', "[^[:digit:]]"},
+ {0, 0}};
/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
must have at least `len' items. Sets buf[0] to zero if the there
is no match in `tre_macros'. */
-static const char *tre_expand_macro(const char *s)
-{
- int i;
- for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
- return tre_macros[i].expansion;
+static const char* tre_expand_macro(const char* s) {
+ int i;
+ for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++)
+ ;
+ return tre_macros[i].expansion;
}
-static int
-tre_compare_lit(const void *a, const void *b)
-{
- const tre_literal_t *const *la = a;
- const tre_literal_t *const *lb = b;
- /* assumes the range of valid code_min is < INT_MAX */
- return la[0]->code_min - lb[0]->code_min;
+static int tre_compare_lit(const void* a, const void* b) {
+ const tre_literal_t* const* la = a;
+ const tre_literal_t* const* lb = b;
+ /* assumes the range of valid code_min is < INT_MAX */
+ return la[0]->code_min - lb[0]->code_min;
}
struct literals {
- tre_mem_t mem;
- tre_literal_t **a;
- int len;
- int cap;
+ tre_mem_t mem;
+ tre_literal_t** a;
+ int len;
+ int cap;
};
-static tre_literal_t *tre_new_lit(struct literals *p)
-{
- tre_literal_t **a;
- if (p->len >= p->cap) {
- if (p->cap >= 1<<15)
- return 0;
- p->cap *= 2;
- a = xrealloc(p->a, p->cap * sizeof *p->a);
- if (!a)
- return 0;
- p->a = a;
- }
- a = p->a + p->len++;
- *a = tre_mem_calloc(p->mem, sizeof **a);
- return *a;
+static tre_literal_t* tre_new_lit(struct literals* p) {
+ tre_literal_t** a;
+ if (p->len >= p->cap) {
+ if (p->cap >= 1 << 15)
+ return 0;
+ p->cap *= 2;
+ a = xrealloc(p->a, p->cap * sizeof *p->a);
+ if (!a)
+ return 0;
+ p->a = a;
+ }
+ a = p->a + p->len++;
+ *a = tre_mem_calloc(p->mem, sizeof **a);
+ return *a;
}
-static int add_icase_literals(struct literals *ls, int min, int max)
-{
- tre_literal_t *lit;
- int b, e, c;
- for (c=min; c<=max; ) {
- /* assumes islower(c) and isupper(c) are exclusive
- and toupper(c)!=c if islower(c).
- multiple opposite case characters are not supported */
- if (tre_islower(c)) {
- b = e = tre_toupper(c);
- for (c++, e++; c<=max; c++, e++)
- if (tre_toupper(c) != e) break;
- } else if (tre_isupper(c)) {
- b = e = tre_tolower(c);
- for (c++, e++; c<=max; c++, e++)
- if (tre_tolower(c) != e) break;
- } else {
- c++;
- continue;
- }
- lit = tre_new_lit(ls);
- if (!lit)
- return -1;
- lit->code_min = b;
- lit->code_max = e-1;
- lit->position = -1;
- }
- return 0;
+static int add_icase_literals(struct literals* ls, int min, int max) {
+ tre_literal_t* lit;
+ int b, e, c;
+ for (c = min; c <= max;) {
+ /* assumes islower(c) and isupper(c) are exclusive
+ and toupper(c)!=c if islower(c).
+ multiple opposite case characters are not supported */
+ if (tre_islower(c)) {
+ b = e = tre_toupper(c);
+ for (c++, e++; c <= max; c++, e++)
+ if (tre_toupper(c) != e)
+ break;
+ } else if (tre_isupper(c)) {
+ b = e = tre_tolower(c);
+ for (c++, e++; c <= max; c++, e++)
+ if (tre_tolower(c) != e)
+ break;
+ } else {
+ c++;
+ continue;
+ }
+ lit = tre_new_lit(ls);
+ if (!lit)
+ return -1;
+ lit->code_min = b;
+ lit->code_max = e - 1;
+ lit->position = -1;
+ }
+ return 0;
}
-
/* Maximum number of character classes in a negated bracket expression. */
#define MAX_NEG_CLASSES 64
struct neg {
- int negate;
- int len;
- tre_ctype_t a[MAX_NEG_CLASSES];
+ int negate;
+ int len;
+ tre_ctype_t a[MAX_NEG_CLASSES];
};
// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
@@ -528,548 +494,549 @@ coll_single is a single char collating element but it can be
'^' anywhere except after the openning '['
*/
-static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
-{
- const char *start = s;
- tre_ctype_t class;
- int min, max;
- wchar_t wc;
- int len;
-
- for (;;) {
- class = 0;
- len = mbtowc(&wc, s, -1);
- if (len <= 0)
- return *s ? REG_BADPAT : REG_EBRACK;
- if (*s == ']' && s != start) {
- ctx->s = s+1;
- return REG_OK;
- }
- if (*s == '-' && s != start && s[1] != ']' &&
- /* extension: [a-z--@] is accepted as [a-z]|[--@] */
- (s[1] != '-' || s[2] == ']'))
- return REG_ERANGE;
- if (*s == '[' && (s[1] == '.' || s[1] == '='))
- /* collating symbols and equivalence classes are not supported */
- return REG_ECOLLATE;
- if (*s == '[' && s[1] == ':') {
- char tmp[CHARCLASS_NAME_MAX+1];
- s += 2;
- for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
- if (s[len] == ':') {
- memcpy(tmp, s, len);
- tmp[len] = 0;
- class = tre_ctype(tmp);
- break;
- }
- }
- if (!class || s[len+1] != ']')
- return REG_ECTYPE;
- min = 0;
- max = TRE_CHAR_MAX;
- s += len+2;
- } else {
- min = max = wc;
- s += len;
- if (*s == '-' && s[1] != ']') {
- s++;
- len = mbtowc(&wc, s, -1);
- max = wc;
- /* XXX - Should use collation order instead of
- encoding values in character ranges. */
- if (len <= 0 || min > max)
- return REG_ERANGE;
- s += len;
- }
- }
-
- if (class && neg->negate) {
- if (neg->len >= MAX_NEG_CLASSES)
- return REG_ESPACE;
- neg->a[neg->len++] = class;
- } else {
- tre_literal_t *lit = tre_new_lit(ls);
- if (!lit)
- return REG_ESPACE;
- lit->code_min = min;
- lit->code_max = max;
- lit->class = class;
- lit->position = -1;
-
- /* Add opposite-case codepoints if REG_ICASE is present.
- It seems that POSIX requires that bracket negation
- should happen before case-folding, but most practical
- implementations do it the other way around. Changing
- the order would need efficient representation of
- case-fold ranges and bracket range sets even with
- simple patterns so this is ok for now. */
- if (ctx->cflags & REG_ICASE && !class)
- if (add_icase_literals(ls, min, max))
- return REG_ESPACE;
- }
- }
+static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t* ctx,
+ const char* s,
+ struct literals* ls,
+ struct neg* neg) {
+ const char* start = s;
+ tre_ctype_t class;
+ int min, max;
+ wchar_t wc;
+ int len;
+
+ for (;;) {
+ class = 0;
+ len = mbtowc(&wc, s, -1);
+ if (len <= 0)
+ return *s ? REG_BADPAT : REG_EBRACK;
+ if (*s == ']' && s != start) {
+ ctx->s = s + 1;
+ return REG_OK;
+ }
+ if (*s == '-' && s != start && s[1] != ']' &&
+ /* extension: [a-z--@] is accepted as [a-z]|[--@] */
+ (s[1] != '-' || s[2] == ']'))
+ return REG_ERANGE;
+ if (*s == '[' && (s[1] == '.' || s[1] == '='))
+ /* collating symbols and equivalence classes are not supported */
+ return REG_ECOLLATE;
+ if (*s == '[' && s[1] == ':') {
+ char tmp[CHARCLASS_NAME_MAX + 1];
+ s += 2;
+ for (len = 0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
+ if (s[len] == ':') {
+ memcpy(tmp, s, len);
+ tmp[len] = 0;
+ class = tre_ctype(tmp);
+ break;
+ }
+ }
+ if (!class || s[len + 1] != ']')
+ return REG_ECTYPE;
+ min = 0;
+ max = TRE_CHAR_MAX;
+ s += len + 2;
+ } else {
+ min = max = wc;
+ s += len;
+ if (*s == '-' && s[1] != ']') {
+ s++;
+ len = mbtowc(&wc, s, -1);
+ max = wc;
+ /* XXX - Should use collation order instead of
+ encoding values in character ranges. */
+ if (len <= 0 || min > max)
+ return REG_ERANGE;
+ s += len;
+ }
+ }
+
+ if (class && neg->negate) {
+ if (neg->len >= MAX_NEG_CLASSES)
+ return REG_ESPACE;
+ neg->a[neg->len++] = class;
+ } else {
+ tre_literal_t* lit = tre_new_lit(ls);
+ if (!lit)
+ return REG_ESPACE;
+ lit->code_min = min;
+ lit->code_max = max;
+ lit->class = class;
+ lit->position = -1;
+
+ /* Add opposite-case codepoints if REG_ICASE is present.
+ It seems that POSIX requires that bracket negation
+ should happen before case-folding, but most practical
+ implementations do it the other way around. Changing
+ the order would need efficient representation of
+ case-fold ranges and bracket range sets even with
+ simple patterns so this is ok for now. */
+ if (ctx->cflags & REG_ICASE && !class)
+ if (add_icase_literals(ls, min, max))
+ return REG_ESPACE;
+ }
+ }
}
-static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
-{
- int i, max, min, negmax, negmin;
- tre_ast_node_t *node = 0, *n;
- tre_ctype_t *nc = 0;
- tre_literal_t *lit;
- struct literals ls;
- struct neg neg;
- reg_errcode_t err;
-
- ls.mem = ctx->mem;
- ls.len = 0;
- ls.cap = 32;
- ls.a = xmalloc(ls.cap * sizeof *ls.a);
- if (!ls.a)
- return REG_ESPACE;
- neg.len = 0;
- neg.negate = *s == '^';
- if (neg.negate)
- s++;
-
- err = parse_bracket_terms(ctx, s, &ls, &neg);
- if (err != REG_OK)
- goto parse_bracket_done;
-
- if (neg.negate) {
- /* Sort the array if we need to negate it. */
- qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
- /* extra lit for the last negated range */
- lit = tre_new_lit(&ls);
- if (!lit) {
- err = REG_ESPACE;
- goto parse_bracket_done;
- }
- lit->code_min = TRE_CHAR_MAX+1;
- lit->code_max = TRE_CHAR_MAX+1;
- lit->position = -1;
- /* negated classes */
- if (neg.len) {
- nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
- if (!nc) {
- err = REG_ESPACE;
- goto parse_bracket_done;
- }
- memcpy(nc, neg.a, neg.len*sizeof *neg.a);
- nc[neg.len] = 0;
- }
- }
-
- /* Build a union of the items in the array, negated if necessary. */
- negmax = negmin = 0;
- for (i = 0; i < ls.len; i++) {
- lit = ls.a[i];
- min = lit->code_min;
- max = lit->code_max;
- if (neg.negate) {
- if (min <= negmin) {
- /* Overlap. */
- negmin = MAX(max + 1, negmin);
- continue;
- }
- negmax = min - 1;
- lit->code_min = negmin;
- lit->code_max = negmax;
- negmin = max + 1;
- }
- lit->position = ctx->position;
- lit->neg_classes = nc;
- n = tre_ast_new_node(ctx->mem, LITERAL, lit);
- node = tre_ast_new_union(ctx->mem, node, n);
- if (!node) {
- err = REG_ESPACE;
- break;
- }
- }
+static reg_errcode_t parse_bracket(tre_parse_ctx_t* ctx, const char* s) {
+ int i, max, min, negmax, negmin;
+ tre_ast_node_t *node = 0, *n;
+ tre_ctype_t* nc = 0;
+ tre_literal_t* lit;
+ struct literals ls;
+ struct neg neg;
+ reg_errcode_t err;
+
+ ls.mem = ctx->mem;
+ ls.len = 0;
+ ls.cap = 32;
+ ls.a = xmalloc(ls.cap * sizeof *ls.a);
+ if (!ls.a)
+ return REG_ESPACE;
+ neg.len = 0;
+ neg.negate = *s == '^';
+ if (neg.negate)
+ s++;
+
+ err = parse_bracket_terms(ctx, s, &ls, &neg);
+ if (err != REG_OK)
+ goto parse_bracket_done;
+
+ if (neg.negate) {
+ /* Sort the array if we need to negate it. */
+ qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
+ /* extra lit for the last negated range */
+ lit = tre_new_lit(&ls);
+ if (!lit) {
+ err = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ lit->code_min = TRE_CHAR_MAX + 1;
+ lit->code_max = TRE_CHAR_MAX + 1;
+ lit->position = -1;
+ /* negated classes */
+ if (neg.len) {
+ nc = tre_mem_alloc(ctx->mem, (neg.len + 1) * sizeof *neg.a);
+ if (!nc) {
+ err = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ memcpy(nc, neg.a, neg.len * sizeof *neg.a);
+ nc[neg.len] = 0;
+ }
+ }
+
+ /* Build a union of the items in the array, negated if necessary. */
+ negmax = negmin = 0;
+ for (i = 0; i < ls.len; i++) {
+ lit = ls.a[i];
+ min = lit->code_min;
+ max = lit->code_max;
+ if (neg.negate) {
+ if (min <= negmin) {
+ /* Overlap. */
+ negmin = MAX(max + 1, negmin);
+ continue;
+ }
+ negmax = min - 1;
+ lit->code_min = negmin;
+ lit->code_max = negmax;
+ negmin = max + 1;
+ }
+ lit->position = ctx->position;
+ lit->neg_classes = nc;
+ n = tre_ast_new_node(ctx->mem, LITERAL, lit);
+ node = tre_ast_new_union(ctx->mem, node, n);
+ if (!node) {
+ err = REG_ESPACE;
+ break;
+ }
+ }
parse_bracket_done:
- xfree(ls.a);
- ctx->position++;
- ctx->n = node;
- return err;
+ xfree(ls.a);
+ ctx->position++;
+ ctx->n = node;
+ return err;
}
-static const char *parse_dup_count(const char *s, int *n)
-{
- *n = -1;
- if (!isdigit(*s))
- return s;
- *n = 0;
- for (;;) {
- *n = 10 * *n + (*s - '0');
- s++;
- if (!isdigit(*s) || *n > RE_DUP_MAX)
- break;
- }
- return s;
+static const char* parse_dup_count(const char* s, int* n) {
+ *n = -1;
+ if (!isdigit(*s))
+ return s;
+ *n = 0;
+ for (;;) {
+ *n = 10 * *n + (*s - '0');
+ s++;
+ if (!isdigit(*s) || *n > RE_DUP_MAX)
+ break;
+ }
+ return s;
}
-static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
-{
- int min, max;
-
- s = parse_dup_count(s, &min);
- if (*s == ',')
- s = parse_dup_count(s+1, &max);
- else
- max = min;
-
- if (
- (max < min && max >= 0) ||
- max > RE_DUP_MAX ||
- min > RE_DUP_MAX ||
- min < 0 ||
- (!ere && *s++ != '\\') ||
- *s++ != '}'
- )
- return 0;
- *pmin = min;
- *pmax = max;
- return s;
+static const char* parse_dup(const char* s, int ere, int* pmin, int* pmax) {
+ int min, max;
+
+ s = parse_dup_count(s, &min);
+ if (*s == ',')
+ s = parse_dup_count(s + 1, &max);
+ else
+ max = min;
+
+ if ((max < min && max >= 0) || max > RE_DUP_MAX || min > RE_DUP_MAX ||
+ min < 0 || (!ere && *s++ != '\\') || *s++ != '}')
+ return 0;
+ *pmin = min;
+ *pmax = max;
+ return s;
}
-static int hexval(unsigned c)
-{
- if (c-'0'<10) return c-'0';
- c |= 32;
- if (c-'a'<6) return c-'a'+10;
- return -1;
+static int hexval(unsigned c) {
+ if (c - '0' < 10)
+ return c - '0';
+ c |= 32;
+ if (c - 'a' < 6)
+ return c - 'a' + 10;
+ return -1;
}
-static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
-{
- if (node->submatch_id >= 0) {
- tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
- if (!n)
- return REG_ESPACE;
- n = tre_ast_new_catenation(ctx->mem, n, node);
- if (!n)
- return REG_ESPACE;
- n->num_submatches = node->num_submatches;
- node = n;
- }
- node->submatch_id = subid;
- node->num_submatches++;
- ctx->n = node;
- return REG_OK;
+static reg_errcode_t marksub(tre_parse_ctx_t* ctx,
+ tre_ast_node_t* node,
+ int subid) {
+ if (node->submatch_id >= 0) {
+ tre_ast_node_t* n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (!n)
+ return REG_ESPACE;
+ n = tre_ast_new_catenation(ctx->mem, n, node);
+ if (!n)
+ return REG_ESPACE;
+ n->num_submatches = node->num_submatches;
+ node = n;
+ }
+ node->submatch_id = subid;
+ node->num_submatches++;
+ ctx->n = node;
+ return REG_OK;
}
/*
BRE grammar:
-Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
+Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^'
+Branch '$'
Branch = Atom | Branch Atom
-Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
-Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
+Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch
+'\)' | back_ref
+Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count
+'\}'
(leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
ERE grammar:
Regex = Branch | Regex '|' Branch
Branch = Atom | Branch Atom
-Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
-Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
+Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex
+')' | '^' | '$'
+Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count
+',' Count '}'
(a*+?, ^*, $+, \X, {, (|a) are unspecified)
*/
-static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
-{
- int len, ere = ctx->cflags & REG_EXTENDED;
- const char *p;
- tre_ast_node_t *node;
- wchar_t wc;
- switch (*s) {
- case '[':
- return parse_bracket(ctx, s+1);
- case '\\':
- p = tre_expand_macro(s+1);
- if (p) {
- /* assume \X expansion is a single atom */
- reg_errcode_t err = parse_atom(ctx, p);
- ctx->s = s+2;
- return err;
- }
- /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
- switch (*++s) {
- case 0:
- return REG_EESCAPE;
- case 'b':
- node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
- break;
- case 'B':
- node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
- break;
- case '<':
- node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
- break;
- case '>':
- node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
- break;
- case 'x':
- s++;
- int i, v = 0, c;
- len = 2;
- if (*s == '{') {
- len = 8;
- s++;
- }
- for (i=0; i<len && v<0x110000; i++) {
- c = hexval(s[i]);
- if (c < 0) break;
- v = 16*v + c;
- }
- s += i;
- if (len == 8) {
- if (*s != '}')
- return REG_EBRACE;
- s++;
- }
- node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
- s--;
- break;
- case '{':
- case '+':
- case '?':
- /* extension: treat \+, \? as repetitions in BRE */
- /* reject repetitions after empty expression in BRE */
- if (!ere)
- return REG_BADRPT;
- case '|':
- /* extension: treat \| as alternation in BRE */
- if (!ere) {
- node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
- s--;
- goto end;
- }
- /* fallthrough */
- default:
- if (!ere && (unsigned)*s-'1' < 9) {
- /* back reference */
- int val = *s - '0';
- node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
- ctx->max_backref = MAX(val, ctx->max_backref);
- } else {
- /* extension: accept unknown escaped char
- as a literal */
- goto parse_literal;
- }
- }
- s++;
- break;
- case '.':
- if (ctx->cflags & REG_NEWLINE) {
- tre_ast_node_t *tmp1, *tmp2;
- tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
- tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
- if (tmp1 && tmp2)
- node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
- else
- node = 0;
- } else {
- node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
- }
- s++;
- break;
- case '^':
- /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
- if (!ere && s != ctx->re)
- goto parse_literal;
- node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
- s++;
- break;
- case '$':
- /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
- if (!ere && s[1])
- goto parse_literal;
- node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
- s++;
- break;
- case '*':
- return REG_BADPAT;
- case '{':
- case '+':
- case '?':
- /* reject repetitions after empty expression in ERE */
- if (ere)
- return REG_BADRPT;
- case '|':
- if (!ere)
- goto parse_literal;
- case 0:
- node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
- break;
- default:
-parse_literal:
- len = mbtowc(&wc, s, -1);
- if (len < 0)
- return REG_BADPAT;
- if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
- tre_ast_node_t *tmp1, *tmp2;
- /* multiple opposite case characters are not supported */
- tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
- tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
- if (tmp1 && tmp2)
- node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
- else
- node = 0;
- } else {
- node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
- }
- ctx->position++;
- s += len;
- break;
- }
+static reg_errcode_t parse_atom(tre_parse_ctx_t* ctx, const char* s) {
+ int len, ere = ctx->cflags & REG_EXTENDED;
+ const char* p;
+ tre_ast_node_t* node;
+ wchar_t wc;
+ switch (*s) {
+ case '[':
+ return parse_bracket(ctx, s + 1);
+ case '\\':
+ p = tre_expand_macro(s + 1);
+ if (p) {
+ /* assume \X expansion is a single atom */
+ reg_errcode_t err = parse_atom(ctx, p);
+ ctx->s = s + 2;
+ return err;
+ }
+ /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
+ switch (*++s) {
+ case 0:
+ return REG_EESCAPE;
+ case 'b':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
+ break;
+ case 'B':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
+ break;
+ case '<':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
+ break;
+ case '>':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
+ break;
+ case 'x':
+ s++;
+ int i, v = 0, c;
+ len = 2;
+ if (*s == '{') {
+ len = 8;
+ s++;
+ }
+ for (i = 0; i < len && v < 0x110000; i++) {
+ c = hexval(s[i]);
+ if (c < 0)
+ break;
+ v = 16 * v + c;
+ }
+ s += i;
+ if (len == 8) {
+ if (*s != '}')
+ return REG_EBRACE;
+ s++;
+ }
+ node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
+ s--;
+ break;
+ case '{':
+ case '+':
+ case '?':
+ /* extension: treat \+, \? as repetitions in BRE */
+ /* reject repetitions after empty expression in BRE */
+ if (!ere)
+ return REG_BADRPT;
+ case '|':
+ /* extension: treat \| as alternation in BRE */
+ if (!ere) {
+ node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ s--;
+ goto end;
+ }
+ /* fallthrough */
+ default:
+ if (!ere && (unsigned)*s - '1' < 9) {
+ /* back reference */
+ int val = *s - '0';
+ node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
+ ctx->max_backref = MAX(val, ctx->max_backref);
+ } else {
+ /* extension: accept unknown escaped char
+ as a literal */
+ goto parse_literal;
+ }
+ }
+ s++;
+ break;
+ case '.':
+ if (ctx->cflags & REG_NEWLINE) {
+ tre_ast_node_t *tmp1, *tmp2;
+ tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n' - 1, ctx->position++);
+ tmp2 = tre_ast_new_literal(ctx->mem, '\n' + 1, TRE_CHAR_MAX,
+ ctx->position++);
+ if (tmp1 && tmp2)
+ node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+ else
+ node = 0;
+ } else {
+ node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
+ }
+ s++;
+ break;
+ case '^':
+ /* '^' has a special meaning everywhere in EREs, and at beginning of BRE.
+ */
+ if (!ere && s != ctx->re)
+ goto parse_literal;
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
+ s++;
+ break;
+ case '$':
+ /* '$' is special everywhere in EREs, and in the end of the string in
+ * BREs. */
+ if (!ere && s[1])
+ goto parse_literal;
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
+ s++;
+ break;
+ case '*':
+ return REG_BADPAT;
+ case '{':
+ case '+':
+ case '?':
+ /* reject repetitions after empty expression in ERE */
+ if (ere)
+ return REG_BADRPT;
+ case '|':
+ if (!ere)
+ goto parse_literal;
+ case 0:
+ node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ break;
+ default:
+ parse_literal:
+ len = mbtowc(&wc, s, -1);
+ if (len < 0)
+ return REG_BADPAT;
+ if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
+ tre_ast_node_t *tmp1, *tmp2;
+ /* multiple opposite case characters are not supported */
+ tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc),
+ ctx->position);
+ tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc),
+ ctx->position);
+ if (tmp1 && tmp2)
+ node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+ else
+ node = 0;
+ } else {
+ node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
+ }
+ ctx->position++;
+ s += len;
+ break;
+ }
end:
- if (!node)
- return REG_ESPACE;
- ctx->n = node;
- ctx->s = s;
- return REG_OK;
+ if (!node)
+ return REG_ESPACE;
+ ctx->n = node;
+ ctx->s = s;
+ return REG_OK;
}
-#define PUSHPTR(err, s, v) do { \
- if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
- return err; \
-} while(0)
-
-#define PUSHINT(err, s, v) do { \
- if ((err = tre_stack_push_int(s, v)) != REG_OK) \
- return err; \
-} while(0)
-
-static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
-{
- tre_ast_node_t *nbranch=0, *nunion=0;
- int ere = ctx->cflags & REG_EXTENDED;
- const char *s = ctx->re;
- int subid = 0;
- int depth = 0;
- reg_errcode_t err;
- tre_stack_t *stack = ctx->stack;
-
- PUSHINT(err, stack, subid++);
- for (;;) {
- if ((!ere && *s == '\\' && s[1] == '(') ||
- (ere && *s == '(')) {
- PUSHPTR(err, stack, nunion);
- PUSHPTR(err, stack, nbranch);
- PUSHINT(err, stack, subid++);
- s++;
- if (!ere)
- s++;
- depth++;
- nbranch = nunion = 0;
- continue;
- }
- if ((!ere && *s == '\\' && s[1] == ')') ||
- (ere && *s == ')' && depth)) {
- ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
- if (!ctx->n)
- return REG_ESPACE;
- } else {
- err = parse_atom(ctx, s);
- if (err != REG_OK)
- return err;
- s = ctx->s;
- }
-
- parse_iter:
- /* extension: repetitions are rejected after an empty node
- eg. (+), |*, {2}, but assertions are not treated as empty
- so ^* or $? are accepted currently. */
- for (;;) {
- int min, max;
-
- if (*s!='\\' && *s!='*') {
- if (!ere)
- break;
- if (*s!='+' && *s!='?' && *s!='{')
- break;
- }
- if (*s=='\\' && ere)
- break;
- /* extension: treat \+, \? as repetitions in BRE */
- if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
- break;
- if (*s=='\\')
- s++;
-
- /* extension: multiple consecutive *+?{,} is unspecified,
- but (a+)+ has to be supported so accepting a++ makes
- sense, note however that the RE_DUP_MAX limit can be
- circumvented: (a{255}){255} uses a lot of memory.. */
- if (*s=='{') {
- s = parse_dup(s+1, ere, &min, &max);
- if (!s)
- return REG_BADBR;
- } else {
- min=0;
- max=-1;
- if (*s == '+')
- min = 1;
- if (*s == '?')
- max = 1;
- s++;
- }
- if (max == 0)
- ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
- else
- ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
- if (!ctx->n)
- return REG_ESPACE;
- }
-
- nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
- if ((ere && *s == '|') ||
- (ere && *s == ')' && depth) ||
- (!ere && *s == '\\' && s[1] == ')') ||
- /* extension: treat \| as alternation in BRE */
- (!ere && *s == '\\' && s[1] == '|') ||
- !*s) {
- /* extension: empty branch is unspecified (), (|a), (a|)
- here they are not rejected but match on empty string */
- int c = *s;
- nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
- nbranch = 0;
-
- if (c == '\\' && s[1] == '|') {
- s+=2;
- } else if (c == '|') {
- s++;
- } else {
- if (c == '\\') {
- if (!depth) return REG_EPAREN;
- s+=2;
- } else if (c == ')')
- s++;
- depth--;
- err = marksub(ctx, nunion, tre_stack_pop_int(stack));
- if (err != REG_OK)
- return err;
- if (!c && depth<0) {
- ctx->submatch_id = subid;
- return REG_OK;
- }
- if (!c || depth<0)
- return REG_EPAREN;
- nbranch = tre_stack_pop_voidptr(stack);
- nunion = tre_stack_pop_voidptr(stack);
- goto parse_iter;
- }
- }
- }
-}
+#define PUSHPTR(err, s, v) \
+ do { \
+ if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
+ return err; \
+ } while (0)
+
+#define PUSHINT(err, s, v) \
+ do { \
+ if ((err = tre_stack_push_int(s, v)) != REG_OK) \
+ return err; \
+ } while (0)
+
+static reg_errcode_t tre_parse(tre_parse_ctx_t* ctx) {
+ tre_ast_node_t *nbranch = 0, *nunion = 0;
+ int ere = ctx->cflags & REG_EXTENDED;
+ const char* s = ctx->re;
+ int subid = 0;
+ int depth = 0;
+ reg_errcode_t err;
+ tre_stack_t* stack = ctx->stack;
+
+ PUSHINT(err, stack, subid++);
+ for (;;) {
+ if ((!ere && *s == '\\' && s[1] == '(') || (ere && *s == '(')) {
+ PUSHPTR(err, stack, nunion);
+ PUSHPTR(err, stack, nbranch);
+ PUSHINT(err, stack, subid++);
+ s++;
+ if (!ere)
+ s++;
+ depth++;
+ nbranch = nunion = 0;
+ continue;
+ }
+ if ((!ere && *s == '\\' && s[1] == ')') || (ere && *s == ')' && depth)) {
+ ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (!ctx->n)
+ return REG_ESPACE;
+ } else {
+ err = parse_atom(ctx, s);
+ if (err != REG_OK)
+ return err;
+ s = ctx->s;
+ }
+
+ parse_iter:
+ /* extension: repetitions are rejected after an empty node
+ eg. (+), |*, {2}, but assertions are not treated as empty
+ so ^* or $? are accepted currently. */
+ for (;;) {
+ int min, max;
+
+ if (*s != '\\' && *s != '*') {
+ if (!ere)
+ break;
+ if (*s != '+' && *s != '?' && *s != '{')
+ break;
+ }
+ if (*s == '\\' && ere)
+ break;
+ /* extension: treat \+, \? as repetitions in BRE */
+ if (*s == '\\' && s[1] != '+' && s[1] != '?' && s[1] != '{')
+ break;
+ if (*s == '\\')
+ s++;
+
+ /* extension: multiple consecutive *+?{,} is unspecified,
+ but (a+)+ has to be supported so accepting a++ makes
+ sense, note however that the RE_DUP_MAX limit can be
+ circumvented: (a{255}){255} uses a lot of memory.. */
+ if (*s == '{') {
+ s = parse_dup(s + 1, ere, &min, &max);
+ if (!s)
+ return REG_BADBR;
+ } else {
+ min = 0;
+ max = -1;
+ if (*s == '+')
+ min = 1;
+ if (*s == '?')
+ max = 1;
+ s++;
+ }
+ if (max == 0)
+ ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ else
+ ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
+ if (!ctx->n)
+ return REG_ESPACE;
+ }
+ nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
+ if ((ere && *s == '|') || (ere && *s == ')' && depth) ||
+ (!ere && *s == '\\' && s[1] == ')') ||
+ /* extension: treat \| as alternation in BRE */
+ (!ere && *s == '\\' && s[1] == '|') || !*s) {
+ /* extension: empty branch is unspecified (), (|a), (a|)
+ here they are not rejected but match on empty string */
+ int c = *s;
+ nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
+ nbranch = 0;
+
+ if (c == '\\' && s[1] == '|') {
+ s += 2;
+ } else if (c == '|') {
+ s++;
+ } else {
+ if (c == '\\') {
+ if (!depth)
+ return REG_EPAREN;
+ s += 2;
+ } else if (c == ')')
+ s++;
+ depth--;
+ err = marksub(ctx, nunion, tre_stack_pop_int(stack));
+ if (err != REG_OK)
+ return err;
+ if (!c && depth < 0) {
+ ctx->submatch_id = subid;
+ return REG_OK;
+ }
+ if (!c || depth < 0)
+ return REG_EPAREN;
+ nbranch = tre_stack_pop_voidptr(stack);
+ nunion = tre_stack_pop_voidptr(stack);
+ goto parse_iter;
+ }
+ }
+ }
+}
/***********************************************************************
from tre-compile.c
***********************************************************************/
-
/*
TODO:
- Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
@@ -1080,14 +1047,13 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
Algorithms to setup tags so that submatch addressing can be done.
*/
-
/* Inserts a catenation node to the root of the tree given in `node'.
As the left child a new tag with number `tag_id' to `node' is added,
and the right child is the old root. */
-static reg_errcode_t
-tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
-{
- tre_catenation_t *c;
+static reg_errcode_t tre_add_tag_left(tre_mem_t mem,
+ tre_ast_node_t* node,
+ int tag_id) {
+ tre_catenation_t* c;
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
@@ -1114,10 +1080,10 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
/* Inserts a catenation node to the root of the tree given in `node'.
As the right child a new tag with number `tag_id' to `node' is added,
and the left child is the old root. */
-static reg_errcode_t
-tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
-{
- tre_catenation_t *c;
+static reg_errcode_t tre_add_tag_right(tre_mem_t mem,
+ tre_ast_node_t* node,
+ int tag_id) {
+ tre_catenation_t* c;
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
@@ -1151,61 +1117,54 @@ typedef enum {
ADDTAGS_SET_SUBMATCH_END
} tre_addtags_symbol_t;
-
typedef struct {
int tag;
int next_tag;
} tre_tag_states_t;
-
/* Go through `regset' and set submatch data for submatches that are
using this tag. */
-static void
-tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
-{
+static void tre_purge_regset(int* regset, tre_tnfa_t* tnfa, int tag) {
int i;
- for (i = 0; regset[i] >= 0; i++)
- {
- int id = regset[i] / 2;
- int start = !(regset[i] % 2);
- if (start)
- tnfa->submatch_data[id].so_tag = tag;
- else
- tnfa->submatch_data[id].eo_tag = tag;
- }
+ for (i = 0; regset[i] >= 0; i++) {
+ int id = regset[i] / 2;
+ int start = !(regset[i] % 2);
+ if (start)
+ tnfa->submatch_data[id].so_tag = tag;
+ else
+ tnfa->submatch_data[id].eo_tag = tag;
+ }
regset[0] = -1;
}
-
/* Adds tags to appropriate locations in the parse tree in `tree', so that
subexpressions marked for submatch addressing can be traced. */
-static reg_errcode_t
-tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
- tre_tnfa_t *tnfa)
-{
+static reg_errcode_t tre_add_tags(tre_mem_t mem,
+ tre_stack_t* stack,
+ tre_ast_node_t* tree,
+ tre_tnfa_t* tnfa) {
reg_errcode_t status = REG_OK;
tre_addtags_symbol_t symbol;
- tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
+ tre_ast_node_t* node = tree; /* Tree node we are currently looking at. */
int bottom = tre_stack_num_objects(stack);
/* True for first pass (counting number of needed tags) */
int first_pass = (mem == NULL || tnfa == NULL);
int *regset, *orig_regset;
- int num_tags = 0; /* Total number of tags. */
- int num_minimals = 0; /* Number of special minimal tags. */
- int tag = 0; /* The tag that is to be added next. */
- int next_tag = 1; /* Next tag to use after this one. */
- int *parents; /* Stack of submatches the current submatch is
- contained in. */
+ int num_tags = 0; /* Total number of tags. */
+ int num_minimals = 0; /* Number of special minimal tags. */
+ int tag = 0; /* The tag that is to be added next. */
+ int next_tag = 1; /* Next tag to use after this one. */
+ int* parents; /* Stack of submatches the current submatch is
+ contained in. */
int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
- tre_tag_states_t *saved_states;
+ tre_tag_states_t* saved_states;
tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
- if (!first_pass)
- {
- tnfa->end_tag = 0;
- tnfa->minimal_tags[0] = -1;
- }
+ if (!first_pass) {
+ tnfa->end_tag = 0;
+ tnfa->minimal_tags[0] = -1;
+ }
regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
if (regset == NULL)
@@ -1214,430 +1173,383 @@ tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
orig_regset = regset;
parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
- if (parents == NULL)
- {
- xfree(regset);
- return REG_ESPACE;
- }
+ if (parents == NULL) {
+ xfree(regset);
+ return REG_ESPACE;
+ }
parents[0] = -1;
saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
- if (saved_states == NULL)
- {
- xfree(regset);
- xfree(parents);
- return REG_ESPACE;
- }
- else
- {
- unsigned int i;
- for (i = 0; i <= tnfa->num_submatches; i++)
- saved_states[i].tag = -1;
- }
+ if (saved_states == NULL) {
+ xfree(regset);
+ xfree(parents);
+ return REG_ESPACE;
+ } else {
+ unsigned int i;
+ for (i = 0; i <= tnfa->num_submatches; i++)
+ saved_states[i].tag = -1;
+ }
STACK_PUSH(stack, voidptr, node);
STACK_PUSH(stack, int, ADDTAGS_RECURSE);
- while (tre_stack_num_objects(stack) > bottom)
- {
- if (status != REG_OK)
- break;
-
- symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
- switch (symbol)
- {
-
- case ADDTAGS_SET_SUBMATCH_END:
- {
- int id = tre_stack_pop_int(stack);
- int i;
-
- /* Add end of this submatch to regset. */
- for (i = 0; regset[i] >= 0; i++);
- regset[i] = id * 2 + 1;
- regset[i + 1] = -1;
-
- /* Pop this submatch from the parents stack. */
- for (i = 0; parents[i] >= 0; i++);
- parents[i - 1] = -1;
- break;
- }
-
- case ADDTAGS_RECURSE:
- node = tre_stack_pop_voidptr(stack);
-
- if (node->submatch_id >= 0)
- {
- int id = node->submatch_id;
- int i;
-
-
- /* Add start of this submatch to regset. */
- for (i = 0; regset[i] >= 0; i++);
- regset[i] = id * 2;
- regset[i + 1] = -1;
-
- if (!first_pass)
- {
- for (i = 0; parents[i] >= 0; i++);
- tnfa->submatch_data[id].parents = NULL;
- if (i > 0)
- {
- int *p = xmalloc(sizeof(*p) * (i + 1));
- if (p == NULL)
- {
- status = REG_ESPACE;
- break;
- }
- assert(tnfa->submatch_data[id].parents == NULL);
- tnfa->submatch_data[id].parents = p;
- for (i = 0; parents[i] >= 0; i++)
- p[i] = parents[i];
- p[i] = -1;
- }
- }
-
- /* Add end of this submatch to regset after processing this
- node. */
- STACK_PUSHX(stack, int, node->submatch_id);
- STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
- }
-
- switch (node->type)
- {
- case LITERAL:
- {
- tre_literal_t *lit = node->obj;
-
- if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
- {
- int i;
- if (regset[0] >= 0)
- {
- /* Regset is not empty, so add a tag before the
- literal or backref. */
- if (!first_pass)
- {
- status = tre_add_tag_left(mem, node, tag);
- tnfa->tag_directions[tag] = direction;
- if (minimal_tag >= 0)
- {
- for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
- tnfa->minimal_tags[i] = tag;
- tnfa->minimal_tags[i + 1] = minimal_tag;
- tnfa->minimal_tags[i + 2] = -1;
- minimal_tag = -1;
- num_minimals++;
- }
- tre_purge_regset(regset, tnfa, tag);
- }
- else
- {
- node->num_tags = 1;
- }
-
- regset[0] = -1;
- tag = next_tag;
- num_tags++;
- next_tag++;
- }
- }
- else
- {
- assert(!IS_TAG(lit));
- }
- break;
- }
- case CATENATION:
- {
- tre_catenation_t *cat = node->obj;
- tre_ast_node_t *left = cat->left;
- tre_ast_node_t *right = cat->right;
- int reserved_tag = -1;
-
-
- /* After processing right child. */
- STACK_PUSHX(stack, voidptr, node);
- STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
-
- /* Process right child. */
- STACK_PUSHX(stack, voidptr, right);
- STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
-
- /* After processing left child. */
- STACK_PUSHX(stack, int, next_tag + left->num_tags);
- if (left->num_tags > 0 && right->num_tags > 0)
- {
- /* Reserve the next tag to the right child. */
- reserved_tag = next_tag;
- next_tag++;
- }
- STACK_PUSHX(stack, int, reserved_tag);
- STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
-
- /* Process left child. */
- STACK_PUSHX(stack, voidptr, left);
- STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
-
- }
- break;
- case ITERATION:
- {
- tre_iteration_t *iter = node->obj;
-
- if (first_pass)
- {
- STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
- }
- else
- {
- STACK_PUSHX(stack, int, tag);
- STACK_PUSHX(stack, int, iter->minimal);
- }
- STACK_PUSHX(stack, voidptr, node);
- STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
-
- STACK_PUSHX(stack, voidptr, iter->arg);
- STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+ while (tre_stack_num_objects(stack) > bottom) {
+ if (status != REG_OK)
+ break;
- /* Regset is not empty, so add a tag here. */
- if (regset[0] >= 0 || iter->minimal)
- {
- if (!first_pass)
- {
- int i;
- status = tre_add_tag_left(mem, node, tag);
- if (iter->minimal)
- tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
- else
- tnfa->tag_directions[tag] = direction;
- if (minimal_tag >= 0)
- {
- for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
- tnfa->minimal_tags[i] = tag;
- tnfa->minimal_tags[i + 1] = minimal_tag;
- tnfa->minimal_tags[i + 2] = -1;
- minimal_tag = -1;
- num_minimals++;
- }
- tre_purge_regset(regset, tnfa, tag);
- }
-
- regset[0] = -1;
- tag = next_tag;
- num_tags++;
- next_tag++;
- }
- direction = TRE_TAG_MINIMIZE;
- }
- break;
- case UNION:
- {
- tre_union_t *uni = node->obj;
- tre_ast_node_t *left = uni->left;
- tre_ast_node_t *right = uni->right;
- int left_tag;
- int right_tag;
-
- if (regset[0] >= 0)
- {
- left_tag = next_tag;
- right_tag = next_tag + 1;
- }
- else
- {
- left_tag = tag;
- right_tag = next_tag;
- }
+ symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
+ switch (symbol) {
+ case ADDTAGS_SET_SUBMATCH_END: {
+ int id = tre_stack_pop_int(stack);
+ int i;
+
+ /* Add end of this submatch to regset. */
+ for (i = 0; regset[i] >= 0; i++)
+ ;
+ regset[i] = id * 2 + 1;
+ regset[i + 1] = -1;
+
+ /* Pop this submatch from the parents stack. */
+ for (i = 0; parents[i] >= 0; i++)
+ ;
+ parents[i - 1] = -1;
+ break;
+ }
- /* After processing right child. */
- STACK_PUSHX(stack, int, right_tag);
- STACK_PUSHX(stack, int, left_tag);
- STACK_PUSHX(stack, voidptr, regset);
- STACK_PUSHX(stack, int, regset[0] >= 0);
- STACK_PUSHX(stack, voidptr, node);
- STACK_PUSHX(stack, voidptr, right);
- STACK_PUSHX(stack, voidptr, left);
- STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
-
- /* Process right child. */
- STACK_PUSHX(stack, voidptr, right);
- STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
-
- /* After processing left child. */
- STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
-
- /* Process left child. */
- STACK_PUSHX(stack, voidptr, left);
- STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
-
- /* Regset is not empty, so add a tag here. */
- if (regset[0] >= 0)
- {
- if (!first_pass)
- {
- int i;
- status = tre_add_tag_left(mem, node, tag);
- tnfa->tag_directions[tag] = direction;
- if (minimal_tag >= 0)
- {
- for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
- tnfa->minimal_tags[i] = tag;
- tnfa->minimal_tags[i + 1] = minimal_tag;
- tnfa->minimal_tags[i + 2] = -1;
- minimal_tag = -1;
- num_minimals++;
- }
- tre_purge_regset(regset, tnfa, tag);
- }
-
- regset[0] = -1;
- tag = next_tag;
- num_tags++;
- next_tag++;
- }
+ case ADDTAGS_RECURSE:
+ node = tre_stack_pop_voidptr(stack);
+
+ if (node->submatch_id >= 0) {
+ int id = node->submatch_id;
+ int i;
+
+ /* Add start of this submatch to regset. */
+ for (i = 0; regset[i] >= 0; i++)
+ ;
+ regset[i] = id * 2;
+ regset[i + 1] = -1;
+
+ if (!first_pass) {
+ for (i = 0; parents[i] >= 0; i++)
+ ;
+ tnfa->submatch_data[id].parents = NULL;
+ if (i > 0) {
+ int* p = xmalloc(sizeof(*p) * (i + 1));
+ if (p == NULL) {
+ status = REG_ESPACE;
+ break;
+ }
+ assert(tnfa->submatch_data[id].parents == NULL);
+ tnfa->submatch_data[id].parents = p;
+ for (i = 0; parents[i] >= 0; i++)
+ p[i] = parents[i];
+ p[i] = -1;
+ }
+ }
+
+ /* Add end of this submatch to regset after processing this
+ node. */
+ STACK_PUSHX(stack, int, node->submatch_id);
+ STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
+ }
+
+ switch (node->type) {
+ case LITERAL: {
+ tre_literal_t* lit = node->obj;
+
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) {
+ int i;
+ if (regset[0] >= 0) {
+ /* Regset is not empty, so add a tag before the
+ literal or backref. */
+ if (!first_pass) {
+ status = tre_add_tag_left(mem, node, tag);
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0) {
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
+ ;
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ } else {
+ node->num_tags = 1;
+ }
+
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+ } else {
+ assert(!IS_TAG(lit));
+ }
+ break;
+ }
+ case CATENATION: {
+ tre_catenation_t* cat = node->obj;
+ tre_ast_node_t* left = cat->left;
+ tre_ast_node_t* right = cat->right;
+ int reserved_tag = -1;
+
+ /* After processing right child. */
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
+
+ /* Process right child. */
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* After processing left child. */
+ STACK_PUSHX(stack, int, next_tag + left->num_tags);
+ if (left->num_tags > 0 && right->num_tags > 0) {
+ /* Reserve the next tag to the right child. */
+ reserved_tag = next_tag;
+ next_tag++;
+ }
+ STACK_PUSHX(stack, int, reserved_tag);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
+
+ /* Process left child. */
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ } break;
+ case ITERATION: {
+ tre_iteration_t* iter = node->obj;
+
+ if (first_pass) {
+ STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
+ } else {
+ STACK_PUSHX(stack, int, tag);
+ STACK_PUSHX(stack, int, iter->minimal);
+ }
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
+
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* Regset is not empty, so add a tag here. */
+ if (regset[0] >= 0 || iter->minimal) {
+ if (!first_pass) {
+ int i;
+ status = tre_add_tag_left(mem, node, tag);
+ if (iter->minimal)
+ tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
+ else
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0) {
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
+ ;
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+ direction = TRE_TAG_MINIMIZE;
+ } break;
+ case UNION: {
+ tre_union_t* uni = node->obj;
+ tre_ast_node_t* left = uni->left;
+ tre_ast_node_t* right = uni->right;
+ int left_tag;
+ int right_tag;
+
+ if (regset[0] >= 0) {
+ left_tag = next_tag;
+ right_tag = next_tag + 1;
+ } else {
+ left_tag = tag;
+ right_tag = next_tag;
+ }
+
+ /* After processing right child. */
+ STACK_PUSHX(stack, int, right_tag);
+ STACK_PUSHX(stack, int, left_tag);
+ STACK_PUSHX(stack, voidptr, regset);
+ STACK_PUSHX(stack, int, regset[0] >= 0);
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
+
+ /* Process right child. */
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* After processing left child. */
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
+
+ /* Process left child. */
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* Regset is not empty, so add a tag here. */
+ if (regset[0] >= 0) {
+ if (!first_pass) {
+ int i;
+ status = tre_add_tag_left(mem, node, tag);
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0) {
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
+ ;
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+
+ if (node->num_submatches > 0) {
+ /* The next two tags are reserved for markers. */
+ next_tag++;
+ tag = next_tag;
+ next_tag++;
+ }
+
+ break;
+ }
+ }
+
+ if (node->submatch_id >= 0) {
+ int i;
+ /* Push this submatch on the parents stack. */
+ for (i = 0; parents[i] >= 0; i++)
+ ;
+ parents[i] = node->submatch_id;
+ parents[i + 1] = -1;
+ }
+
+ break; /* end case: ADDTAGS_RECURSE */
+
+ case ADDTAGS_AFTER_ITERATION: {
+ int minimal = 0;
+ int enter_tag;
+ node = tre_stack_pop_voidptr(stack);
+ if (first_pass) {
+ node->num_tags = ((tre_iteration_t*)node->obj)->arg->num_tags +
+ tre_stack_pop_int(stack);
+ minimal_tag = -1;
+ } else {
+ minimal = tre_stack_pop_int(stack);
+ enter_tag = tre_stack_pop_int(stack);
+ if (minimal)
+ minimal_tag = enter_tag;
+ }
+
+ if (!first_pass) {
+ if (minimal)
+ direction = TRE_TAG_MINIMIZE;
+ else
+ direction = TRE_TAG_MAXIMIZE;
+ }
+ break;
+ }
- if (node->num_submatches > 0)
- {
- /* The next two tags are reserved for markers. */
- next_tag++;
- tag = next_tag;
- next_tag++;
- }
+ case ADDTAGS_AFTER_CAT_LEFT: {
+ int new_tag = tre_stack_pop_int(stack);
+ next_tag = tre_stack_pop_int(stack);
+ if (new_tag >= 0) {
+ tag = new_tag;
+ }
+ break;
+ }
- break;
- }
- }
-
- if (node->submatch_id >= 0)
- {
- int i;
- /* Push this submatch on the parents stack. */
- for (i = 0; parents[i] >= 0; i++);
- parents[i] = node->submatch_id;
- parents[i + 1] = -1;
- }
-
- break; /* end case: ADDTAGS_RECURSE */
-
- case ADDTAGS_AFTER_ITERATION:
- {
- int minimal = 0;
- int enter_tag;
- node = tre_stack_pop_voidptr(stack);
- if (first_pass)
- {
- node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
- + tre_stack_pop_int(stack);
- minimal_tag = -1;
- }
- else
- {
- minimal = tre_stack_pop_int(stack);
- enter_tag = tre_stack_pop_int(stack);
- if (minimal)
- minimal_tag = enter_tag;
- }
-
- if (!first_pass)
- {
- if (minimal)
- direction = TRE_TAG_MINIMIZE;
- else
- direction = TRE_TAG_MAXIMIZE;
- }
- break;
- }
-
- case ADDTAGS_AFTER_CAT_LEFT:
- {
- int new_tag = tre_stack_pop_int(stack);
- next_tag = tre_stack_pop_int(stack);
- if (new_tag >= 0)
- {
- tag = new_tag;
- }
- break;
- }
-
- case ADDTAGS_AFTER_CAT_RIGHT:
- node = tre_stack_pop_voidptr(stack);
- if (first_pass)
- node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
- + ((tre_catenation_t *)node->obj)->right->num_tags;
- break;
-
- case ADDTAGS_AFTER_UNION_LEFT:
- /* Lift the bottom of the `regset' array so that when processing
- the right operand the items currently in the array are
- invisible. The original bottom was saved at ADDTAGS_UNION and
- will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
- while (*regset >= 0)
- regset++;
- break;
-
- case ADDTAGS_AFTER_UNION_RIGHT:
- {
- int added_tags, tag_left, tag_right;
- tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
- tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
- node = tre_stack_pop_voidptr(stack);
- added_tags = tre_stack_pop_int(stack);
- if (first_pass)
- {
- node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
- + ((tre_union_t *)node->obj)->right->num_tags + added_tags
- + ((node->num_submatches > 0) ? 2 : 0);
- }
- regset = tre_stack_pop_voidptr(stack);
- tag_left = tre_stack_pop_int(stack);
- tag_right = tre_stack_pop_int(stack);
-
- /* Add tags after both children, the left child gets a smaller
- tag than the right child. This guarantees that we prefer
- the left child over the right child. */
- /* XXX - This is not always necessary (if the children have
- tags which must be seen for every match of that child). */
- /* XXX - Check if this is the only place where tre_add_tag_right
- is used. If so, use tre_add_tag_left (putting the tag before
- the child as opposed after the child) and throw away
- tre_add_tag_right. */
- if (node->num_submatches > 0)
- {
- if (!first_pass)
- {
- status = tre_add_tag_right(mem, left, tag_left);
- tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
- if (status == REG_OK)
- status = tre_add_tag_right(mem, right, tag_right);
- tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
- }
- num_tags += 2;
- }
- direction = TRE_TAG_MAXIMIZE;
- break;
- }
+ case ADDTAGS_AFTER_CAT_RIGHT:
+ node = tre_stack_pop_voidptr(stack);
+ if (first_pass)
+ node->num_tags = ((tre_catenation_t*)node->obj)->left->num_tags +
+ ((tre_catenation_t*)node->obj)->right->num_tags;
+ break;
+
+ case ADDTAGS_AFTER_UNION_LEFT:
+ /* Lift the bottom of the `regset' array so that when processing
+ the right operand the items currently in the array are
+ invisible. The original bottom was saved at ADDTAGS_UNION and
+ will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
+ while (*regset >= 0)
+ regset++;
+ break;
+
+ case ADDTAGS_AFTER_UNION_RIGHT: {
+ int added_tags, tag_left, tag_right;
+ tre_ast_node_t* left = tre_stack_pop_voidptr(stack);
+ tre_ast_node_t* right = tre_stack_pop_voidptr(stack);
+ node = tre_stack_pop_voidptr(stack);
+ added_tags = tre_stack_pop_int(stack);
+ if (first_pass) {
+ node->num_tags = ((tre_union_t*)node->obj)->left->num_tags +
+ ((tre_union_t*)node->obj)->right->num_tags +
+ added_tags + ((node->num_submatches > 0) ? 2 : 0);
+ }
+ regset = tre_stack_pop_voidptr(stack);
+ tag_left = tre_stack_pop_int(stack);
+ tag_right = tre_stack_pop_int(stack);
+
+ /* Add tags after both children, the left child gets a smaller
+ tag than the right child. This guarantees that we prefer
+ the left child over the right child. */
+ /* XXX - This is not always necessary (if the children have
+ tags which must be seen for every match of that child). */
+ /* XXX - Check if this is the only place where tre_add_tag_right
+ is used. If so, use tre_add_tag_left (putting the tag before
+ the child as opposed after the child) and throw away
+ tre_add_tag_right. */
+ if (node->num_submatches > 0) {
+ if (!first_pass) {
+ status = tre_add_tag_right(mem, left, tag_left);
+ tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
+ if (status == REG_OK)
+ status = tre_add_tag_right(mem, right, tag_right);
+ tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
+ }
+ num_tags += 2;
+ }
+ direction = TRE_TAG_MAXIMIZE;
+ break;
+ }
- default:
- assert(0);
- break;
+ default:
+ assert(0);
+ break;
- } /* end switch(symbol) */
- } /* end while(tre_stack_num_objects(stack) > bottom) */
+ } /* end switch(symbol) */
+ } /* end while(tre_stack_num_objects(stack) > bottom) */
if (!first_pass)
tre_purge_regset(regset, tnfa, tag);
- if (!first_pass && minimal_tag >= 0)
- {
- int i;
- for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
- tnfa->minimal_tags[i] = tag;
- tnfa->minimal_tags[i + 1] = minimal_tag;
- tnfa->minimal_tags[i + 2] = -1;
- minimal_tag = -1;
- num_minimals++;
- }
+ if (!first_pass && minimal_tag >= 0) {
+ int i;
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
+ ;
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
assert(tree->num_tags == num_tags);
tnfa->end_tag = num_tags;
@@ -1649,173 +1561,154 @@ tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
return status;
}
-
-
/*
AST to TNFA compilation routines.
*/
-typedef enum {
- COPY_RECURSE,
- COPY_SET_RESULT_PTR
-} tre_copyast_symbol_t;
+typedef enum { COPY_RECURSE, COPY_SET_RESULT_PTR } tre_copyast_symbol_t;
/* Flags for tre_copy_ast(). */
-#define COPY_REMOVE_TAGS 1
-#define COPY_MAXIMIZE_FIRST_TAG 2
-
-static reg_errcode_t
-tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
- int flags, int *pos_add, tre_tag_direction_t *tag_directions,
- tre_ast_node_t **copy, int *max_pos)
-{
+#define COPY_REMOVE_TAGS 1
+#define COPY_MAXIMIZE_FIRST_TAG 2
+
+static reg_errcode_t tre_copy_ast(tre_mem_t mem,
+ tre_stack_t* stack,
+ tre_ast_node_t* ast,
+ int flags,
+ int* pos_add,
+ tre_tag_direction_t* tag_directions,
+ tre_ast_node_t** copy,
+ int* max_pos) {
reg_errcode_t status = REG_OK;
int bottom = tre_stack_num_objects(stack);
int num_copied = 0;
int first_tag = 1;
- tre_ast_node_t **result = copy;
+ tre_ast_node_t** result = copy;
tre_copyast_symbol_t symbol;
STACK_PUSH(stack, voidptr, ast);
STACK_PUSH(stack, int, COPY_RECURSE);
- while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
- {
- tre_ast_node_t *node;
- if (status != REG_OK)
- break;
-
- symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
- switch (symbol)
- {
- case COPY_SET_RESULT_PTR:
- result = tre_stack_pop_voidptr(stack);
- break;
- case COPY_RECURSE:
- node = tre_stack_pop_voidptr(stack);
- switch (node->type)
- {
- case LITERAL:
- {
- tre_literal_t *lit = node->obj;
- int pos = lit->position;
- int min = lit->code_min;
- int max = lit->code_max;
- if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
- {
- /* XXX - e.g. [ab] has only one position but two
- nodes, so we are creating holes in the state space
- here. Not fatal, just wastes memory. */
- pos += *pos_add;
- num_copied++;
- }
- else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
- {
- /* Change this tag to empty. */
- min = EMPTY;
- max = pos = -1;
- }
- else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
- && first_tag)
- {
- /* Maximize the first tag. */
- tag_directions[max] = TRE_TAG_MAXIMIZE;
- first_tag = 0;
- }
- *result = tre_ast_new_literal(mem, min, max, pos);
- if (*result == NULL)
- status = REG_ESPACE;
- else {
- tre_literal_t *p = (*result)->obj;
- p->class = lit->class;
- p->neg_classes = lit->neg_classes;
- }
-
- if (pos > *max_pos)
- *max_pos = pos;
- break;
- }
- case UNION:
- {
- tre_union_t *uni = node->obj;
- tre_union_t *tmp;
- *result = tre_ast_new_union(mem, uni->left, uni->right);
- if (*result == NULL)
- {
- status = REG_ESPACE;
- break;
- }
- tmp = (*result)->obj;
- result = &tmp->left;
- STACK_PUSHX(stack, voidptr, uni->right);
- STACK_PUSHX(stack, int, COPY_RECURSE);
- STACK_PUSHX(stack, voidptr, &tmp->right);
- STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
- STACK_PUSHX(stack, voidptr, uni->left);
- STACK_PUSHX(stack, int, COPY_RECURSE);
- break;
- }
- case CATENATION:
- {
- tre_catenation_t *cat = node->obj;
- tre_catenation_t *tmp;
- *result = tre_ast_new_catenation(mem, cat->left, cat->right);
- if (*result == NULL)
- {
- status = REG_ESPACE;
- break;
- }
- tmp = (*result)->obj;
- tmp->left = NULL;
- tmp->right = NULL;
- result = &tmp->left;
-
- STACK_PUSHX(stack, voidptr, cat->right);
- STACK_PUSHX(stack, int, COPY_RECURSE);
- STACK_PUSHX(stack, voidptr, &tmp->right);
- STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
- STACK_PUSHX(stack, voidptr, cat->left);
- STACK_PUSHX(stack, int, COPY_RECURSE);
- break;
- }
- case ITERATION:
- {
- tre_iteration_t *iter = node->obj;
- STACK_PUSHX(stack, voidptr, iter->arg);
- STACK_PUSHX(stack, int, COPY_RECURSE);
- *result = tre_ast_new_iter(mem, iter->arg, iter->min,
- iter->max, iter->minimal);
- if (*result == NULL)
- {
- status = REG_ESPACE;
- break;
- }
- iter = (*result)->obj;
- result = &iter->arg;
- break;
- }
- default:
- assert(0);
- break;
- }
- break;
- }
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {
+ tre_ast_node_t* node;
+ if (status != REG_OK)
+ break;
+
+ symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
+ switch (symbol) {
+ case COPY_SET_RESULT_PTR:
+ result = tre_stack_pop_voidptr(stack);
+ break;
+ case COPY_RECURSE:
+ node = tre_stack_pop_voidptr(stack);
+ switch (node->type) {
+ case LITERAL: {
+ tre_literal_t* lit = node->obj;
+ int pos = lit->position;
+ int min = lit->code_min;
+ int max = lit->code_max;
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) {
+ /* XXX - e.g. [ab] has only one position but two
+ nodes, so we are creating holes in the state space
+ here. Not fatal, just wastes memory. */
+ pos += *pos_add;
+ num_copied++;
+ } else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) {
+ /* Change this tag to empty. */
+ min = EMPTY;
+ max = pos = -1;
+ } else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) &&
+ first_tag) {
+ /* Maximize the first tag. */
+ tag_directions[max] = TRE_TAG_MAXIMIZE;
+ first_tag = 0;
+ }
+ *result = tre_ast_new_literal(mem, min, max, pos);
+ if (*result == NULL)
+ status = REG_ESPACE;
+ else {
+ tre_literal_t* p = (*result)->obj;
+ p->class = lit->class;
+ p->neg_classes = lit->neg_classes;
+ }
+
+ if (pos > *max_pos)
+ *max_pos = pos;
+ break;
+ }
+ case UNION: {
+ tre_union_t* uni = node->obj;
+ tre_union_t* tmp;
+ *result = tre_ast_new_union(mem, uni->left, uni->right);
+ if (*result == NULL) {
+ status = REG_ESPACE;
+ break;
+ }
+ tmp = (*result)->obj;
+ result = &tmp->left;
+ STACK_PUSHX(stack, voidptr, uni->right);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ STACK_PUSHX(stack, voidptr, &tmp->right);
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+ STACK_PUSHX(stack, voidptr, uni->left);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ break;
+ }
+ case CATENATION: {
+ tre_catenation_t* cat = node->obj;
+ tre_catenation_t* tmp;
+ *result = tre_ast_new_catenation(mem, cat->left, cat->right);
+ if (*result == NULL) {
+ status = REG_ESPACE;
+ break;
+ }
+ tmp = (*result)->obj;
+ tmp->left = NULL;
+ tmp->right = NULL;
+ result = &tmp->left;
+
+ STACK_PUSHX(stack, voidptr, cat->right);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ STACK_PUSHX(stack, voidptr, &tmp->right);
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ break;
+ }
+ case ITERATION: {
+ tre_iteration_t* iter = node->obj;
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ *result = tre_ast_new_iter(mem, iter->arg, iter->min, iter->max,
+ iter->minimal);
+ if (*result == NULL) {
+ status = REG_ESPACE;
+ break;
+ }
+ iter = (*result)->obj;
+ result = &iter->arg;
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ break;
}
+ }
*pos_add += num_copied;
return status;
}
-typedef enum {
- EXPAND_RECURSE,
- EXPAND_AFTER_ITER
-} tre_expand_ast_symbol_t;
+typedef enum { EXPAND_RECURSE, EXPAND_AFTER_ITER } tre_expand_ast_symbol_t;
/* Expands each iteration node that has a finite nonzero minimum or maximum
iteration count to a catenated sequence of copies of the node. */
-static reg_errcode_t
-tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
- int *position, tre_tag_direction_t *tag_directions)
-{
+static reg_errcode_t tre_expand_ast(tre_mem_t mem,
+ tre_stack_t* stack,
+ tre_ast_node_t* ast,
+ int* position,
+ tre_tag_direction_t* tag_directions) {
reg_errcode_t status = REG_OK;
int bottom = tre_stack_num_objects(stack);
int pos_add = 0;
@@ -1825,165 +1718,148 @@ tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
STACK_PUSHR(stack, voidptr, ast);
STACK_PUSHR(stack, int, EXPAND_RECURSE);
- while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
- {
- tre_ast_node_t *node;
- tre_expand_ast_symbol_t symbol;
-
- if (status != REG_OK)
- break;
-
- symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
- node = tre_stack_pop_voidptr(stack);
- switch (symbol)
- {
- case EXPAND_RECURSE:
- switch (node->type)
- {
- case LITERAL:
- {
- tre_literal_t *lit= node->obj;
- if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
- {
- lit->position += pos_add;
- if (lit->position > max_pos)
- max_pos = lit->position;
- }
- break;
- }
- case UNION:
- {
- tre_union_t *uni = node->obj;
- STACK_PUSHX(stack, voidptr, uni->right);
- STACK_PUSHX(stack, int, EXPAND_RECURSE);
- STACK_PUSHX(stack, voidptr, uni->left);
- STACK_PUSHX(stack, int, EXPAND_RECURSE);
- break;
- }
- case CATENATION:
- {
- tre_catenation_t *cat = node->obj;
- STACK_PUSHX(stack, voidptr, cat->right);
- STACK_PUSHX(stack, int, EXPAND_RECURSE);
- STACK_PUSHX(stack, voidptr, cat->left);
- STACK_PUSHX(stack, int, EXPAND_RECURSE);
- break;
- }
- case ITERATION:
- {
- tre_iteration_t *iter = node->obj;
- STACK_PUSHX(stack, int, pos_add);
- STACK_PUSHX(stack, voidptr, node);
- STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
- STACK_PUSHX(stack, voidptr, iter->arg);
- STACK_PUSHX(stack, int, EXPAND_RECURSE);
- /* If we are going to expand this node at EXPAND_AFTER_ITER
- then don't increase the `pos' fields of the nodes now, it
- will get done when expanding. */
- if (iter->min > 1 || iter->max > 1)
- pos_add = 0;
- iter_depth++;
- break;
- }
- default:
- assert(0);
- break;
- }
- break;
- case EXPAND_AFTER_ITER:
- {
- tre_iteration_t *iter = node->obj;
- int pos_add_last;
- pos_add = tre_stack_pop_int(stack);
- pos_add_last = pos_add;
- if (iter->min > 1 || iter->max > 1)
- {
- tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
- int j;
- int pos_add_save = pos_add;
-
- /* Create a catenated sequence of copies of the node. */
- for (j = 0; j < iter->min; j++)
- {
- tre_ast_node_t *copy;
- /* Remove tags from all but the last copy. */
- int flags = ((j + 1 < iter->min)
- ? COPY_REMOVE_TAGS
- : COPY_MAXIMIZE_FIRST_TAG);
- pos_add_save = pos_add;
- status = tre_copy_ast(mem, stack, iter->arg, flags,
- &pos_add, tag_directions, &copy,
- &max_pos);
- if (status != REG_OK)
- return status;
- if (seq1 != NULL)
- seq1 = tre_ast_new_catenation(mem, seq1, copy);
- else
- seq1 = copy;
- if (seq1 == NULL)
- return REG_ESPACE;
- }
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {
+ tre_ast_node_t* node;
+ tre_expand_ast_symbol_t symbol;
- if (iter->max == -1)
- {
- /* No upper limit. */
- pos_add_save = pos_add;
- status = tre_copy_ast(mem, stack, iter->arg, 0,
- &pos_add, NULL, &seq2, &max_pos);
- if (status != REG_OK)
- return status;
- seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
- if (seq2 == NULL)
- return REG_ESPACE;
- }
- else
- {
- for (j = iter->min; j < iter->max; j++)
- {
- tre_ast_node_t *tmp, *copy;
- pos_add_save = pos_add;
- status = tre_copy_ast(mem, stack, iter->arg, 0,
- &pos_add, NULL, &copy, &max_pos);
- if (status != REG_OK)
- return status;
- if (seq2 != NULL)
- seq2 = tre_ast_new_catenation(mem, copy, seq2);
- else
- seq2 = copy;
- if (seq2 == NULL)
- return REG_ESPACE;
- tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
- if (tmp == NULL)
- return REG_ESPACE;
- seq2 = tre_ast_new_union(mem, tmp, seq2);
- if (seq2 == NULL)
- return REG_ESPACE;
- }
- }
+ if (status != REG_OK)
+ break;
- pos_add = pos_add_save;
- if (seq1 == NULL)
- seq1 = seq2;
- else if (seq2 != NULL)
- seq1 = tre_ast_new_catenation(mem, seq1, seq2);
- if (seq1 == NULL)
- return REG_ESPACE;
- node->obj = seq1->obj;
- node->type = seq1->type;
- }
-
- iter_depth--;
- pos_add_total += pos_add - pos_add_last;
- if (iter_depth == 0)
- pos_add = pos_add_total;
-
- break;
- }
- default:
- assert(0);
- break;
- }
+ symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
+ node = tre_stack_pop_voidptr(stack);
+ switch (symbol) {
+ case EXPAND_RECURSE:
+ switch (node->type) {
+ case LITERAL: {
+ tre_literal_t* lit = node->obj;
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) {
+ lit->position += pos_add;
+ if (lit->position > max_pos)
+ max_pos = lit->position;
+ }
+ break;
+ }
+ case UNION: {
+ tre_union_t* uni = node->obj;
+ STACK_PUSHX(stack, voidptr, uni->right);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ STACK_PUSHX(stack, voidptr, uni->left);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ break;
+ }
+ case CATENATION: {
+ tre_catenation_t* cat = node->obj;
+ STACK_PUSHX(stack, voidptr, cat->right);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ break;
+ }
+ case ITERATION: {
+ tre_iteration_t* iter = node->obj;
+ STACK_PUSHX(stack, int, pos_add);
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ /* If we are going to expand this node at EXPAND_AFTER_ITER
+ then don't increase the `pos' fields of the nodes now, it
+ will get done when expanding. */
+ if (iter->min > 1 || iter->max > 1)
+ pos_add = 0;
+ iter_depth++;
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case EXPAND_AFTER_ITER: {
+ tre_iteration_t* iter = node->obj;
+ int pos_add_last;
+ pos_add = tre_stack_pop_int(stack);
+ pos_add_last = pos_add;
+ if (iter->min > 1 || iter->max > 1) {
+ tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
+ int j;
+ int pos_add_save = pos_add;
+
+ /* Create a catenated sequence of copies of the node. */
+ for (j = 0; j < iter->min; j++) {
+ tre_ast_node_t* copy;
+ /* Remove tags from all but the last copy. */
+ int flags = ((j + 1 < iter->min) ? COPY_REMOVE_TAGS
+ : COPY_MAXIMIZE_FIRST_TAG);
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, flags, &pos_add,
+ tag_directions, &copy, &max_pos);
+ if (status != REG_OK)
+ return status;
+ if (seq1 != NULL)
+ seq1 = tre_ast_new_catenation(mem, seq1, copy);
+ else
+ seq1 = copy;
+ if (seq1 == NULL)
+ return REG_ESPACE;
+ }
+
+ if (iter->max == -1) {
+ /* No upper limit. */
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL,
+ &seq2, &max_pos);
+ if (status != REG_OK)
+ return status;
+ seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ } else {
+ for (j = iter->min; j < iter->max; j++) {
+ tre_ast_node_t *tmp, *copy;
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL,
+ &copy, &max_pos);
+ if (status != REG_OK)
+ return status;
+ if (seq2 != NULL)
+ seq2 = tre_ast_new_catenation(mem, copy, seq2);
+ else
+ seq2 = copy;
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
+ if (tmp == NULL)
+ return REG_ESPACE;
+ seq2 = tre_ast_new_union(mem, tmp, seq2);
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ }
+ }
+
+ pos_add = pos_add_save;
+ if (seq1 == NULL)
+ seq1 = seq2;
+ else if (seq2 != NULL)
+ seq1 = tre_ast_new_catenation(mem, seq1, seq2);
+ if (seq1 == NULL)
+ return REG_ESPACE;
+ node->obj = seq1->obj;
+ node->type = seq1->type;
+ }
+
+ iter_depth--;
+ pos_add_total += pos_add - pos_add_last;
+ if (iter_depth == 0)
+ pos_add = pos_add_total;
+
+ break;
+ }
+ default:
+ assert(0);
+ break;
}
+ }
*position += pos_add_total;
@@ -1997,10 +1873,8 @@ tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
return status;
}
-static tre_pos_and_tags_t *
-tre_set_empty(tre_mem_t mem)
-{
- tre_pos_and_tags_t *new_set;
+static tre_pos_and_tags_t* tre_set_empty(tre_mem_t mem) {
+ tre_pos_and_tags_t* new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set));
if (new_set == NULL)
@@ -2013,11 +1887,14 @@ tre_set_empty(tre_mem_t mem)
return new_set;
}
-static tre_pos_and_tags_t *
-tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
- tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
-{
- tre_pos_and_tags_t *new_set;
+static tre_pos_and_tags_t* tre_set_one(tre_mem_t mem,
+ int position,
+ int code_min,
+ int code_max,
+ tre_ctype_t class,
+ tre_ctype_t* neg_classes,
+ int backref) {
+ tre_pos_and_tags_t* new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
if (new_set == NULL)
@@ -2036,73 +1913,74 @@ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
return new_set;
}
-static tre_pos_and_tags_t *
-tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
- int *tags, int assertions)
-{
+static tre_pos_and_tags_t* tre_set_union(tre_mem_t mem,
+ tre_pos_and_tags_t* set1,
+ tre_pos_and_tags_t* set2,
+ int* tags,
+ int assertions) {
int s1, s2, i, j;
- tre_pos_and_tags_t *new_set;
- int *new_tags;
+ tre_pos_and_tags_t* new_set;
+ int* new_tags;
int num_tags;
- for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
- for (s1 = 0; set1[s1].position >= 0; s1++);
- for (s2 = 0; set2[s2].position >= 0; s2++);
+ for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++)
+ ;
+ for (s1 = 0; set1[s1].position >= 0; s1++)
+ ;
+ for (s2 = 0; set2[s2].position >= 0; s2++)
+ ;
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
- if (!new_set )
+ if (!new_set)
return NULL;
- for (s1 = 0; set1[s1].position >= 0; s1++)
- {
- new_set[s1].position = set1[s1].position;
- new_set[s1].code_min = set1[s1].code_min;
- new_set[s1].code_max = set1[s1].code_max;
- new_set[s1].assertions = set1[s1].assertions | assertions;
- new_set[s1].class = set1[s1].class;
- new_set[s1].neg_classes = set1[s1].neg_classes;
- new_set[s1].backref = set1[s1].backref;
- if (set1[s1].tags == NULL && tags == NULL)
- new_set[s1].tags = NULL;
- else
- {
- for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
- new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
- * (i + num_tags + 1)));
- if (new_tags == NULL)
- return NULL;
- for (j = 0; j < i; j++)
- new_tags[j] = set1[s1].tags[j];
- for (i = 0; i < num_tags; i++)
- new_tags[j + i] = tags[i];
- new_tags[j + i] = -1;
- new_set[s1].tags = new_tags;
- }
+ for (s1 = 0; set1[s1].position >= 0; s1++) {
+ new_set[s1].position = set1[s1].position;
+ new_set[s1].code_min = set1[s1].code_min;
+ new_set[s1].code_max = set1[s1].code_max;
+ new_set[s1].assertions = set1[s1].assertions | assertions;
+ new_set[s1].class = set1[s1].class;
+ new_set[s1].neg_classes = set1[s1].neg_classes;
+ new_set[s1].backref = set1[s1].backref;
+ if (set1[s1].tags == NULL && tags == NULL)
+ new_set[s1].tags = NULL;
+ else {
+ for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++)
+ ;
+ new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) * (i + num_tags + 1)));
+ if (new_tags == NULL)
+ return NULL;
+ for (j = 0; j < i; j++)
+ new_tags[j] = set1[s1].tags[j];
+ for (i = 0; i < num_tags; i++)
+ new_tags[j + i] = tags[i];
+ new_tags[j + i] = -1;
+ new_set[s1].tags = new_tags;
}
+ }
- for (s2 = 0; set2[s2].position >= 0; s2++)
- {
- new_set[s1 + s2].position = set2[s2].position;
- new_set[s1 + s2].code_min = set2[s2].code_min;
- new_set[s1 + s2].code_max = set2[s2].code_max;
- /* XXX - why not | assertions here as well? */
- new_set[s1 + s2].assertions = set2[s2].assertions;
- new_set[s1 + s2].class = set2[s2].class;
- new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
- new_set[s1 + s2].backref = set2[s2].backref;
- if (set2[s2].tags == NULL)
- new_set[s1 + s2].tags = NULL;
- else
- {
- for (i = 0; set2[s2].tags[i] >= 0; i++);
- new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
- if (new_tags == NULL)
- return NULL;
- for (j = 0; j < i; j++)
- new_tags[j] = set2[s2].tags[j];
- new_tags[j] = -1;
- new_set[s1 + s2].tags = new_tags;
- }
+ for (s2 = 0; set2[s2].position >= 0; s2++) {
+ new_set[s1 + s2].position = set2[s2].position;
+ new_set[s1 + s2].code_min = set2[s2].code_min;
+ new_set[s1 + s2].code_max = set2[s2].code_max;
+ /* XXX - why not | assertions here as well? */
+ new_set[s1 + s2].assertions = set2[s2].assertions;
+ new_set[s1 + s2].class = set2[s2].class;
+ new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
+ new_set[s1 + s2].backref = set2[s2].backref;
+ if (set2[s2].tags == NULL)
+ new_set[s1 + s2].tags = NULL;
+ else {
+ for (i = 0; set2[s2].tags[i] >= 0; i++)
+ ;
+ new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
+ if (new_tags == NULL)
+ return NULL;
+ for (j = 0; j < i; j++)
+ new_tags[j] = set2[s2].tags[j];
+ new_tags[j] = -1;
+ new_set[s1 + s2].tags = new_tags;
}
+ }
new_set[s1 + s2].position = -1;
return new_set;
}
@@ -2111,14 +1989,15 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
taken according to POSIX.2 rules, and adds the tags on that path to
`tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
set to the number of tags seen on the path. */
-static reg_errcode_t
-tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
- int *assertions, int *num_tags_seen)
-{
- tre_literal_t *lit;
- tre_union_t *uni;
- tre_catenation_t *cat;
- tre_iteration_t *iter;
+static reg_errcode_t tre_match_empty(tre_stack_t* stack,
+ tre_ast_node_t* node,
+ int* tags,
+ int* assertions,
+ int* num_tags_seen) {
+ tre_literal_t* lit;
+ tre_union_t* uni;
+ tre_catenation_t* cat;
+ tre_iteration_t* iter;
int i;
int bottom = tre_stack_num_objects(stack);
reg_errcode_t status = REG_OK;
@@ -2128,89 +2007,81 @@ tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
status = tre_stack_push_voidptr(stack, node);
/* Walk through the tree recursively. */
- while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
- {
- node = tre_stack_pop_voidptr(stack);
-
- switch (node->type)
- {
- case LITERAL:
- lit = (tre_literal_t *)node->obj;
- switch (lit->code_min)
- {
- case TAG:
- if (lit->code_max >= 0)
- {
- if (tags != NULL)
- {
- /* Add the tag to `tags'. */
- for (i = 0; tags[i] >= 0; i++)
- if (tags[i] == lit->code_max)
- break;
- if (tags[i] < 0)
- {
- tags[i] = lit->code_max;
- tags[i + 1] = -1;
- }
- }
- if (num_tags_seen)
- (*num_tags_seen)++;
- }
- break;
- case ASSERTION:
- assert(lit->code_max >= 1
- || lit->code_max <= ASSERT_LAST);
- if (assertions != NULL)
- *assertions |= lit->code_max;
- break;
- case EMPTY:
- break;
- default:
- assert(0);
- break;
- }
- break;
-
- case UNION:
- /* Subexpressions starting earlier take priority over ones
- starting later, so we prefer the left subexpression over the
- right subexpression. */
- uni = (tre_union_t *)node->obj;
- if (uni->left->nullable)
- STACK_PUSHX(stack, voidptr, uni->left)
- else if (uni->right->nullable)
- STACK_PUSHX(stack, voidptr, uni->right)
- else
- assert(0);
- break;
-
- case CATENATION:
- /* The path must go through both children. */
- cat = (tre_catenation_t *)node->obj;
- assert(cat->left->nullable);
- assert(cat->right->nullable);
- STACK_PUSHX(stack, voidptr, cat->left);
- STACK_PUSHX(stack, voidptr, cat->right);
- break;
-
- case ITERATION:
- /* A match with an empty string is preferred over no match at
- all, so we go through the argument if possible. */
- iter = (tre_iteration_t *)node->obj;
- if (iter->arg->nullable)
- STACK_PUSHX(stack, voidptr, iter->arg);
- break;
-
- default:
- assert(0);
- break;
- }
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {
+ node = tre_stack_pop_voidptr(stack);
+
+ switch (node->type) {
+ case LITERAL:
+ lit = (tre_literal_t*)node->obj;
+ switch (lit->code_min) {
+ case TAG:
+ if (lit->code_max >= 0) {
+ if (tags != NULL) {
+ /* Add the tag to `tags'. */
+ for (i = 0; tags[i] >= 0; i++)
+ if (tags[i] == lit->code_max)
+ break;
+ if (tags[i] < 0) {
+ tags[i] = lit->code_max;
+ tags[i + 1] = -1;
+ }
+ }
+ if (num_tags_seen)
+ (*num_tags_seen)++;
+ }
+ break;
+ case ASSERTION:
+ assert(lit->code_max >= 1 || lit->code_max <= ASSERT_LAST);
+ if (assertions != NULL)
+ *assertions |= lit->code_max;
+ break;
+ case EMPTY:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+
+ case UNION:
+ /* Subexpressions starting earlier take priority over ones
+ starting later, so we prefer the left subexpression over the
+ right subexpression. */
+ uni = (tre_union_t*)node->obj;
+ if (uni->left->nullable)
+ STACK_PUSHX(stack, voidptr, uni->left)
+ else if (uni->right->nullable)
+ STACK_PUSHX(stack, voidptr, uni->right)
+ else
+ assert(0);
+ break;
+
+ case CATENATION:
+ /* The path must go through both children. */
+ cat = (tre_catenation_t*)node->obj;
+ assert(cat->left->nullable);
+ assert(cat->right->nullable);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, voidptr, cat->right);
+ break;
+
+ case ITERATION:
+ /* A match with an empty string is preferred over no match at
+ all, so we go through the argument if possible. */
+ iter = (tre_iteration_t*)node->obj;
+ if (iter->arg->nullable)
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ break;
+
+ default:
+ assert(0);
+ break;
}
+ }
return status;
}
-
typedef enum {
NFL_RECURSE,
NFL_POST_UNION,
@@ -2218,263 +2089,228 @@ typedef enum {
NFL_POST_ITERATION
} tre_nfl_stack_symbol_t;
-
/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
the nodes of the AST `tree'. */
-static reg_errcode_t
-tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
-{
+static reg_errcode_t tre_compute_nfl(tre_mem_t mem,
+ tre_stack_t* stack,
+ tre_ast_node_t* tree) {
int bottom = tre_stack_num_objects(stack);
STACK_PUSHR(stack, voidptr, tree);
STACK_PUSHR(stack, int, NFL_RECURSE);
- while (tre_stack_num_objects(stack) > bottom)
- {
- tre_nfl_stack_symbol_t symbol;
- tre_ast_node_t *node;
-
- symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
- node = tre_stack_pop_voidptr(stack);
- switch (symbol)
- {
- case NFL_RECURSE:
- switch (node->type)
- {
- case LITERAL:
- {
- tre_literal_t *lit = (tre_literal_t *)node->obj;
- if (IS_BACKREF(lit))
- {
- /* Back references: nullable = false, firstpos = {i},
- lastpos = {i}. */
- node->nullable = 0;
- node->firstpos = tre_set_one(mem, lit->position, 0,
- TRE_CHAR_MAX, 0, NULL, -1);
- if (!node->firstpos)
- return REG_ESPACE;
- node->lastpos = tre_set_one(mem, lit->position, 0,
- TRE_CHAR_MAX, 0, NULL,
- (int)lit->code_max);
- if (!node->lastpos)
- return REG_ESPACE;
- }
- else if (lit->code_min < 0)
- {
- /* Tags, empty strings, params, and zero width assertions:
- nullable = true, firstpos = {}, and lastpos = {}. */
- node->nullable = 1;
- node->firstpos = tre_set_empty(mem);
- if (!node->firstpos)
- return REG_ESPACE;
- node->lastpos = tre_set_empty(mem);
- if (!node->lastpos)
- return REG_ESPACE;
- }
- else
- {
- /* Literal at position i: nullable = false, firstpos = {i},
- lastpos = {i}. */
- node->nullable = 0;
- node->firstpos =
- tre_set_one(mem, lit->position, (int)lit->code_min,
- (int)lit->code_max, 0, NULL, -1);
- if (!node->firstpos)
- return REG_ESPACE;
- node->lastpos = tre_set_one(mem, lit->position,
- (int)lit->code_min,
- (int)lit->code_max,
- lit->class, lit->neg_classes,
- -1);
- if (!node->lastpos)
- return REG_ESPACE;
- }
- break;
- }
-
- case UNION:
- /* Compute the attributes for the two subtrees, and after that
- for this node. */
- STACK_PUSHR(stack, voidptr, node);
- STACK_PUSHR(stack, int, NFL_POST_UNION);
- STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
- STACK_PUSHR(stack, int, NFL_RECURSE);
- STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
- STACK_PUSHR(stack, int, NFL_RECURSE);
- break;
-
- case CATENATION:
- /* Compute the attributes for the two subtrees, and after that
- for this node. */
- STACK_PUSHR(stack, voidptr, node);
- STACK_PUSHR(stack, int, NFL_POST_CATENATION);
- STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
- STACK_PUSHR(stack, int, NFL_RECURSE);
- STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
- STACK_PUSHR(stack, int, NFL_RECURSE);
- break;
-
- case ITERATION:
- /* Compute the attributes for the subtree, and after that for
- this node. */
- STACK_PUSHR(stack, voidptr, node);
- STACK_PUSHR(stack, int, NFL_POST_ITERATION);
- STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
- STACK_PUSHR(stack, int, NFL_RECURSE);
- break;
- }
- break; /* end case: NFL_RECURSE */
-
- case NFL_POST_UNION:
- {
- tre_union_t *uni = (tre_union_t *)node->obj;
- node->nullable = uni->left->nullable || uni->right->nullable;
- node->firstpos = tre_set_union(mem, uni->left->firstpos,
- uni->right->firstpos, NULL, 0);
- if (!node->firstpos)
- return REG_ESPACE;
- node->lastpos = tre_set_union(mem, uni->left->lastpos,
- uni->right->lastpos, NULL, 0);
- if (!node->lastpos)
- return REG_ESPACE;
- break;
- }
-
- case NFL_POST_ITERATION:
- {
- tre_iteration_t *iter = (tre_iteration_t *)node->obj;
-
- if (iter->min == 0 || iter->arg->nullable)
- node->nullable = 1;
- else
- node->nullable = 0;
- node->firstpos = iter->arg->firstpos;
- node->lastpos = iter->arg->lastpos;
- break;
- }
-
- case NFL_POST_CATENATION:
- {
- int num_tags, *tags, assertions;
- reg_errcode_t status;
- tre_catenation_t *cat = node->obj;
- node->nullable = cat->left->nullable && cat->right->nullable;
-
- /* Compute firstpos. */
- if (cat->left->nullable)
- {
- /* The left side matches the empty string. Make a first pass
- with tre_match_empty() to get the number of tags and
- parameters. */
- status = tre_match_empty(stack, cat->left,
- NULL, NULL, &num_tags);
- if (status != REG_OK)
- return status;
- /* Allocate arrays for the tags and parameters. */
- tags = xmalloc(sizeof(*tags) * (num_tags + 1));
- if (!tags)
- return REG_ESPACE;
- tags[0] = -1;
- assertions = 0;
- /* Second pass with tre_mach_empty() to get the list of
- tags and parameters. */
- status = tre_match_empty(stack, cat->left, tags,
- &assertions, NULL);
- if (status != REG_OK)
- {
- xfree(tags);
- return status;
- }
- node->firstpos =
- tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
- tags, assertions);
- xfree(tags);
- if (!node->firstpos)
- return REG_ESPACE;
- }
- else
- {
- node->firstpos = cat->left->firstpos;
- }
-
- /* Compute lastpos. */
- if (cat->right->nullable)
- {
- /* The right side matches the empty string. Make a first pass
- with tre_match_empty() to get the number of tags and
- parameters. */
- status = tre_match_empty(stack, cat->right,
- NULL, NULL, &num_tags);
- if (status != REG_OK)
- return status;
- /* Allocate arrays for the tags and parameters. */
- tags = xmalloc(sizeof(int) * (num_tags + 1));
- if (!tags)
- return REG_ESPACE;
- tags[0] = -1;
- assertions = 0;
- /* Second pass with tre_mach_empty() to get the list of
- tags and parameters. */
- status = tre_match_empty(stack, cat->right, tags,
- &assertions, NULL);
- if (status != REG_OK)
- {
- xfree(tags);
- return status;
- }
- node->lastpos =
- tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
- tags, assertions);
- xfree(tags);
- if (!node->lastpos)
- return REG_ESPACE;
- }
- else
- {
- node->lastpos = cat->right->lastpos;
- }
- break;
- }
-
- default:
- assert(0);
- break;
- }
+ while (tre_stack_num_objects(stack) > bottom) {
+ tre_nfl_stack_symbol_t symbol;
+ tre_ast_node_t* node;
+
+ symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
+ node = tre_stack_pop_voidptr(stack);
+ switch (symbol) {
+ case NFL_RECURSE:
+ switch (node->type) {
+ case LITERAL: {
+ tre_literal_t* lit = (tre_literal_t*)node->obj;
+ if (IS_BACKREF(lit)) {
+ /* Back references: nullable = false, firstpos = {i},
+ lastpos = {i}. */
+ node->nullable = 0;
+ node->firstpos =
+ tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX,
+ 0, NULL, (int)lit->code_max);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ } else if (lit->code_min < 0) {
+ /* Tags, empty strings, params, and zero width assertions:
+ nullable = true, firstpos = {}, and lastpos = {}. */
+ node->nullable = 1;
+ node->firstpos = tre_set_empty(mem);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_empty(mem);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ } else {
+ /* Literal at position i: nullable = false, firstpos = {i},
+ lastpos = {i}. */
+ node->nullable = 0;
+ node->firstpos =
+ tre_set_one(mem, lit->position, (int)lit->code_min,
+ (int)lit->code_max, 0, NULL, -1);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_one(
+ mem, lit->position, (int)lit->code_min, (int)lit->code_max,
+ lit->class, lit->neg_classes, -1);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ break;
+ }
+
+ case UNION:
+ /* Compute the attributes for the two subtrees, and after that
+ for this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_UNION);
+ STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->right);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->left);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+
+ case CATENATION:
+ /* Compute the attributes for the two subtrees, and after that
+ for this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_CATENATION);
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->right);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->left);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+
+ case ITERATION:
+ /* Compute the attributes for the subtree, and after that for
+ this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_ITERATION);
+ STACK_PUSHR(stack, voidptr, ((tre_iteration_t*)node->obj)->arg);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+ }
+ break; /* end case: NFL_RECURSE */
+
+ case NFL_POST_UNION: {
+ tre_union_t* uni = (tre_union_t*)node->obj;
+ node->nullable = uni->left->nullable || uni->right->nullable;
+ node->firstpos = tre_set_union(mem, uni->left->firstpos,
+ uni->right->firstpos, NULL, 0);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_union(mem, uni->left->lastpos,
+ uni->right->lastpos, NULL, 0);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ break;
+ }
+
+ case NFL_POST_ITERATION: {
+ tre_iteration_t* iter = (tre_iteration_t*)node->obj;
+
+ if (iter->min == 0 || iter->arg->nullable)
+ node->nullable = 1;
+ else
+ node->nullable = 0;
+ node->firstpos = iter->arg->firstpos;
+ node->lastpos = iter->arg->lastpos;
+ break;
+ }
+
+ case NFL_POST_CATENATION: {
+ int num_tags, *tags, assertions;
+ reg_errcode_t status;
+ tre_catenation_t* cat = node->obj;
+ node->nullable = cat->left->nullable && cat->right->nullable;
+
+ /* Compute firstpos. */
+ if (cat->left->nullable) {
+ /* The left side matches the empty string. Make a first pass
+ with tre_match_empty() to get the number of tags and
+ parameters. */
+ status = tre_match_empty(stack, cat->left, NULL, NULL, &num_tags);
+ if (status != REG_OK)
+ return status;
+ /* Allocate arrays for the tags and parameters. */
+ tags = xmalloc(sizeof(*tags) * (num_tags + 1));
+ if (!tags)
+ return REG_ESPACE;
+ tags[0] = -1;
+ assertions = 0;
+ /* Second pass with tre_mach_empty() to get the list of
+ tags and parameters. */
+ status = tre_match_empty(stack, cat->left, tags, &assertions, NULL);
+ if (status != REG_OK) {
+ xfree(tags);
+ return status;
+ }
+ node->firstpos = tre_set_union(mem, cat->right->firstpos,
+ cat->left->firstpos, tags, assertions);
+ xfree(tags);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ } else {
+ node->firstpos = cat->left->firstpos;
+ }
+
+ /* Compute lastpos. */
+ if (cat->right->nullable) {
+ /* The right side matches the empty string. Make a first pass
+ with tre_match_empty() to get the number of tags and
+ parameters. */
+ status = tre_match_empty(stack, cat->right, NULL, NULL, &num_tags);
+ if (status != REG_OK)
+ return status;
+ /* Allocate arrays for the tags and parameters. */
+ tags = xmalloc(sizeof(int) * (num_tags + 1));
+ if (!tags)
+ return REG_ESPACE;
+ tags[0] = -1;
+ assertions = 0;
+ /* Second pass with tre_mach_empty() to get the list of
+ tags and parameters. */
+ status = tre_match_empty(stack, cat->right, tags, &assertions, NULL);
+ if (status != REG_OK) {
+ xfree(tags);
+ return status;
+ }
+ node->lastpos = tre_set_union(mem, cat->left->lastpos,
+ cat->right->lastpos, tags, assertions);
+ xfree(tags);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ } else {
+ node->lastpos = cat->right->lastpos;
+ }
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
}
+ }
return REG_OK;
}
-
/* Adds a transition from each position in `p1' to each position in `p2'. */
-static reg_errcode_t
-tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
- tre_tnfa_transition_t *transitions,
- int *counts, int *offs)
-{
- tre_pos_and_tags_t *orig_p2 = p2;
- tre_tnfa_transition_t *trans;
+static reg_errcode_t tre_make_trans(tre_pos_and_tags_t* p1,
+ tre_pos_and_tags_t* p2,
+ tre_tnfa_transition_t* transitions,
+ int* counts,
+ int* offs) {
+ tre_pos_and_tags_t* orig_p2 = p2;
+ tre_tnfa_transition_t* trans;
int i, j, k, l, dup, prev_p2_pos;
if (transitions != NULL)
- while (p1->position >= 0)
- {
- p2 = orig_p2;
- prev_p2_pos = -1;
- while (p2->position >= 0)
- {
- /* Optimization: if this position was already handled, skip it. */
- if (p2->position == prev_p2_pos)
- {
- p2++;
- continue;
- }
- prev_p2_pos = p2->position;
- /* Set `trans' to point to the next unused transition from
- position `p1->position'. */
- trans = transitions + offs[p1->position];
- while (trans->state != NULL)
- {
+ while (p1->position >= 0) {
+ p2 = orig_p2;
+ prev_p2_pos = -1;
+ while (p2->position >= 0) {
+ /* Optimization: if this position was already handled, skip it. */
+ if (p2->position == prev_p2_pos) {
+ p2++;
+ continue;
+ }
+ prev_p2_pos = p2->position;
+ /* Set `trans' to point to the next unused transition from
+ position `p1->position'. */
+ trans = transitions + offs[p1->position];
+ while (trans->state != NULL) {
#if 0
/* If we find a previous transition from `p1->position' to
`p2->position', it is overwritten. This can happen only
@@ -2492,108 +2328,99 @@ tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
break;
}
#endif
- trans++;
- }
-
- if (trans->state == NULL)
- (trans + 1)->state = NULL;
- /* Use the character ranges, assertions, etc. from `p1' for
- the transition from `p1' to `p2'. */
- trans->code_min = p1->code_min;
- trans->code_max = p1->code_max;
- trans->state = transitions + offs[p2->position];
- trans->state_id = p2->position;
- trans->assertions = p1->assertions | p2->assertions
- | (p1->class ? ASSERT_CHAR_CLASS : 0)
- | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
- if (p1->backref >= 0)
- {
- assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
- assert(p2->backref < 0);
- trans->u.backref = p1->backref;
- trans->assertions |= ASSERT_BACKREF;
- }
- else
- trans->u.class = p1->class;
- if (p1->neg_classes != NULL)
- {
- for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
- trans->neg_classes =
- xmalloc(sizeof(*trans->neg_classes) * (i + 1));
- if (trans->neg_classes == NULL)
- return REG_ESPACE;
- for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
- trans->neg_classes[i] = p1->neg_classes[i];
- trans->neg_classes[i] = (tre_ctype_t)0;
- }
- else
- trans->neg_classes = NULL;
-
- /* Find out how many tags this transition has. */
- i = 0;
- if (p1->tags != NULL)
- while(p1->tags[i] >= 0)
- i++;
- j = 0;
- if (p2->tags != NULL)
- while(p2->tags[j] >= 0)
- j++;
-
- /* If we are overwriting a transition, free the old tag array. */
- if (trans->tags != NULL)
- xfree(trans->tags);
- trans->tags = NULL;
-
- /* If there were any tags, allocate an array and fill it. */
- if (i + j > 0)
- {
- trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
- if (!trans->tags)
- return REG_ESPACE;
- i = 0;
- if (p1->tags != NULL)
- while(p1->tags[i] >= 0)
- {
- trans->tags[i] = p1->tags[i];
- i++;
- }
- l = i;
- j = 0;
- if (p2->tags != NULL)
- while (p2->tags[j] >= 0)
- {
- /* Don't add duplicates. */
- dup = 0;
- for (k = 0; k < i; k++)
- if (trans->tags[k] == p2->tags[j])
- {
- dup = 1;
- break;
- }
- if (!dup)
- trans->tags[l++] = p2->tags[j];
- j++;
- }
- trans->tags[l] = -1;
- }
-
- p2++;
- }
- p1++;
+ trans++;
+ }
+
+ if (trans->state == NULL)
+ (trans + 1)->state = NULL;
+ /* Use the character ranges, assertions, etc. from `p1' for
+ the transition from `p1' to `p2'. */
+ trans->code_min = p1->code_min;
+ trans->code_max = p1->code_max;
+ trans->state = transitions + offs[p2->position];
+ trans->state_id = p2->position;
+ trans->assertions =
+ p1->assertions | p2->assertions |
+ (p1->class ? ASSERT_CHAR_CLASS : 0) |
+ (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
+ if (p1->backref >= 0) {
+ assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
+ assert(p2->backref < 0);
+ trans->u.backref = p1->backref;
+ trans->assertions |= ASSERT_BACKREF;
+ } else
+ trans->u.class = p1->class;
+ if (p1->neg_classes != NULL) {
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
+ ;
+ trans->neg_classes = xmalloc(sizeof(*trans->neg_classes) * (i + 1));
+ if (trans->neg_classes == NULL)
+ return REG_ESPACE;
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
+ trans->neg_classes[i] = p1->neg_classes[i];
+ trans->neg_classes[i] = (tre_ctype_t)0;
+ } else
+ trans->neg_classes = NULL;
+
+ /* Find out how many tags this transition has. */
+ i = 0;
+ if (p1->tags != NULL)
+ while (p1->tags[i] >= 0)
+ i++;
+ j = 0;
+ if (p2->tags != NULL)
+ while (p2->tags[j] >= 0)
+ j++;
+
+ /* If we are overwriting a transition, free the old tag array. */
+ if (trans->tags != NULL)
+ xfree(trans->tags);
+ trans->tags = NULL;
+
+ /* If there were any tags, allocate an array and fill it. */
+ if (i + j > 0) {
+ trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
+ if (!trans->tags)
+ return REG_ESPACE;
+ i = 0;
+ if (p1->tags != NULL)
+ while (p1->tags[i] >= 0) {
+ trans->tags[i] = p1->tags[i];
+ i++;
+ }
+ l = i;
+ j = 0;
+ if (p2->tags != NULL)
+ while (p2->tags[j] >= 0) {
+ /* Don't add duplicates. */
+ dup = 0;
+ for (k = 0; k < i; k++)
+ if (trans->tags[k] == p2->tags[j]) {
+ dup = 1;
+ break;
+ }
+ if (!dup)
+ trans->tags[l++] = p2->tags[j];
+ j++;
+ }
+ trans->tags[l] = -1;
+ }
+
+ p2++;
}
+ p1++;
+ }
else
/* Compute a maximum limit for the number of transitions leaving
from each state. */
- while (p1->position >= 0)
- {
- p2 = orig_p2;
- while (p2->position >= 0)
- {
- counts[p1->position]++;
- p2++;
- }
- p1++;
+ while (p1->position >= 0) {
+ p2 = orig_p2;
+ while (p2->position >= 0) {
+ counts[p1->position]++;
+ p2++;
}
+ p1++;
+ }
return REG_OK;
}
@@ -2601,85 +2428,77 @@ tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
labelled with one character range (there are no transitions on empty
strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
the regexp. */
-static reg_errcode_t
-tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
- int *counts, int *offs)
-{
- tre_union_t *uni;
- tre_catenation_t *cat;
- tre_iteration_t *iter;
+static reg_errcode_t tre_ast_to_tnfa(tre_ast_node_t* node,
+ tre_tnfa_transition_t* transitions,
+ int* counts,
+ int* offs) {
+ tre_union_t* uni;
+ tre_catenation_t* cat;
+ tre_iteration_t* iter;
reg_errcode_t errcode = REG_OK;
/* XXX - recurse using a stack!. */
- switch (node->type)
- {
+ switch (node->type) {
case LITERAL:
break;
case UNION:
- uni = (tre_union_t *)node->obj;
+ uni = (tre_union_t*)node->obj;
errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
if (errcode != REG_OK)
- return errcode;
+ return errcode;
errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
break;
case CATENATION:
- cat = (tre_catenation_t *)node->obj;
+ cat = (tre_catenation_t*)node->obj;
/* Add a transition from each position in cat->left->lastpos
- to each position in cat->right->firstpos. */
+ to each position in cat->right->firstpos. */
errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
- transitions, counts, offs);
+ transitions, counts, offs);
if (errcode != REG_OK)
- return errcode;
+ return errcode;
errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
if (errcode != REG_OK)
- return errcode;
+ return errcode;
errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
break;
case ITERATION:
- iter = (tre_iteration_t *)node->obj;
+ iter = (tre_iteration_t*)node->obj;
assert(iter->max == -1 || iter->max == 1);
- if (iter->max == -1)
- {
- assert(iter->min == 0 || iter->min == 1);
- /* Add a transition from each last position in the iterated
- expression to each first position. */
- errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
- transitions, counts, offs);
- if (errcode != REG_OK)
- return errcode;
- }
+ if (iter->max == -1) {
+ assert(iter->min == 0 || iter->min == 1);
+ /* Add a transition from each last position in the iterated
+ expression to each first position. */
+ errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
+ transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ }
errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
break;
- }
+ }
return errcode;
}
+#define ERROR_EXIT(err) \
+ do { \
+ errcode = err; \
+ if (/*CONSTCOND*/ 1) \
+ goto error_exit; \
+ } while (/*CONSTCOND*/ 0)
-#define ERROR_EXIT(err) \
- do \
- { \
- errcode = err; \
- if (/*CONSTCOND*/1) \
- goto error_exit; \
- } \
- while (/*CONSTCOND*/0)
-
-
-int
-regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
-{
- tre_stack_t *stack;
+int regcomp(regex_t* restrict preg, const char* restrict regex, int cflags) {
+ tre_stack_t* stack;
tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
- tre_pos_and_tags_t *p;
+ tre_pos_and_tags_t* p;
int *counts = NULL, *offs = NULL;
int i, add = 0;
tre_tnfa_transition_t *transitions, *initial;
- tre_tnfa_t *tnfa = NULL;
- tre_submatch_data_t *submatch_data;
- tre_tag_direction_t *tag_directions = NULL;
+ tre_tnfa_t* tnfa = NULL;
+ tre_submatch_data_t* submatch_data;
+ tre_tag_direction_t* tag_directions = NULL;
reg_errcode_t errcode;
tre_mem_t mem;
@@ -2693,11 +2512,10 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
return REG_ESPACE;
/* Allocate a fast memory allocator. */
mem = tre_mem_new();
- if (!mem)
- {
- tre_stack_destroy(stack);
- return REG_ESPACE;
- }
+ if (!mem) {
+ tre_stack_destroy(stack);
+ return REG_ESPACE;
+ }
/* Parse the regexp. */
memset(&parse_ctx, 0, sizeof(parse_ctx));
@@ -2730,51 +2548,46 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
/* Set up tags for submatch addressing. If REG_NOSUB is set and the
regexp does not have back references, this can be skipped. */
- if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
- {
-
- /* Figure out how many tags we will need. */
- errcode = tre_add_tags(NULL, stack, tree, tnfa);
- if (errcode != REG_OK)
- ERROR_EXIT(errcode);
-
- if (tnfa->num_tags > 0)
- {
- tag_directions = xmalloc(sizeof(*tag_directions)
- * (tnfa->num_tags + 1));
- if (tag_directions == NULL)
- ERROR_EXIT(REG_ESPACE);
- tnfa->tag_directions = tag_directions;
- memset(tag_directions, -1,
- sizeof(*tag_directions) * (tnfa->num_tags + 1));
- }
- tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
- sizeof(*tnfa->minimal_tags));
- if (tnfa->minimal_tags == NULL)
- ERROR_EXIT(REG_ESPACE);
-
- submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
- sizeof(*submatch_data));
- if (submatch_data == NULL)
- ERROR_EXIT(REG_ESPACE);
- tnfa->submatch_data = submatch_data;
-
- errcode = tre_add_tags(mem, stack, tree, tnfa);
- if (errcode != REG_OK)
- ERROR_EXIT(errcode);
-
+ if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) {
+ /* Figure out how many tags we will need. */
+ errcode = tre_add_tags(NULL, stack, tree, tnfa);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ if (tnfa->num_tags > 0) {
+ tag_directions = xmalloc(sizeof(*tag_directions) * (tnfa->num_tags + 1));
+ if (tag_directions == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->tag_directions = tag_directions;
+ memset(tag_directions, -1,
+ sizeof(*tag_directions) * (tnfa->num_tags + 1));
}
+ tnfa->minimal_tags =
+ xcalloc((unsigned)tnfa->num_tags * 2 + 1, sizeof(*tnfa->minimal_tags));
+ if (tnfa->minimal_tags == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ submatch_data =
+ xcalloc((unsigned)parse_ctx.submatch_id, sizeof(*submatch_data));
+ if (submatch_data == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->submatch_data = submatch_data;
+
+ errcode = tre_add_tags(mem, stack, tree, tnfa);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+ }
/* Expand iteration nodes. */
- errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
- tag_directions);
+ errcode =
+ tre_expand_ast(mem, stack, tree, &parse_ctx.position, tag_directions);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
/* Add a dummy node for the final state.
XXX - For certain patterns this dummy node can be optimized away,
- for example "a*" or "ab*". Figure out a simple way to detect
- this possibility. */
+ for example "a*" or "ab*". Figure out a simple way to detect
+ this possibility. */
tmp_ast_l = tree;
tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
if (tmp_ast_r == NULL)
@@ -2801,12 +2614,11 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
tre_ast_to_tnfa(tree, NULL, counts, NULL);
add = 0;
- for (i = 0; i < parse_ctx.position; i++)
- {
- offs[i] = add;
- add += counts[i] + 1;
- counts[i] = 0;
- }
+ for (i = 0; i < parse_ctx.position; i++) {
+ offs[i] = add;
+ add += counts[i] + 1;
+ counts[i] = 0;
+ }
transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
if (transitions == NULL)
ERROR_EXIT(REG_ESPACE);
@@ -2821,11 +2633,10 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
p = tree->firstpos;
i = 0;
- while (p->position >= 0)
- {
- i++;
- p++;
- }
+ while (p->position >= 0) {
+ i++;
+ p++;
+ }
initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
if (initial == NULL)
@@ -2833,25 +2644,24 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
tnfa->initial = initial;
i = 0;
- for (p = tree->firstpos; p->position >= 0; p++)
- {
- initial[i].state = transitions + offs[p->position];
- initial[i].state_id = p->position;
- initial[i].tags = NULL;
- /* Copy the arrays p->tags, and p->params, they are allocated
- from a tre_mem object. */
- if (p->tags)
- {
- int j;
- for (j = 0; p->tags[j] >= 0; j++);
- initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
- if (!initial[i].tags)
- ERROR_EXIT(REG_ESPACE);
- memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
- }
- initial[i].assertions = p->assertions;
- i++;
+ for (p = tree->firstpos; p->position >= 0; p++) {
+ initial[i].state = transitions + offs[p->position];
+ initial[i].state_id = p->position;
+ initial[i].tags = NULL;
+ /* Copy the arrays p->tags, and p->params, they are allocated
+ from a tre_mem object. */
+ if (p->tags) {
+ int j;
+ for (j = 0; p->tags[j] >= 0; j++)
+ ;
+ initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
+ if (!initial[i].tags)
+ ERROR_EXIT(REG_ESPACE);
+ memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
}
+ initial[i].assertions = p->assertions;
+ i++;
+ }
initial[i].state = NULL;
tnfa->num_transitions = add;
@@ -2864,10 +2674,10 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
xfree(counts);
xfree(offs);
- preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+ preg->TRE_REGEX_T_FIELD = (void*)tnfa;
return REG_OK;
- error_exit:
+error_exit:
/* Free everything that was allocated and return the error code. */
tre_mem_destroy(mem);
if (stack != NULL)
@@ -2876,53 +2686,44 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
xfree(counts);
if (offs != NULL)
xfree(offs);
- preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+ preg->TRE_REGEX_T_FIELD = (void*)tnfa;
regfree(preg);
return errcode;
}
-
-
-
-void
-regfree(regex_t *preg)
-{
- tre_tnfa_t *tnfa;
+void regfree(regex_t* preg) {
+ tre_tnfa_t* tnfa;
unsigned int i;
- tre_tnfa_transition_t *trans;
+ tre_tnfa_transition_t* trans;
- tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ tnfa = (void*)preg->TRE_REGEX_T_FIELD;
if (!tnfa)
return;
for (i = 0; i < tnfa->num_transitions; i++)
- if (tnfa->transitions[i].state)
- {
- if (tnfa->transitions[i].tags)
- xfree(tnfa->transitions[i].tags);
- if (tnfa->transitions[i].neg_classes)
- xfree(tnfa->transitions[i].neg_classes);
- }
+ if (tnfa->transitions[i].state) {
+ if (tnfa->transitions[i].tags)
+ xfree(tnfa->transitions[i].tags);
+ if (tnfa->transitions[i].neg_classes)
+ xfree(tnfa->transitions[i].neg_classes);
+ }
if (tnfa->transitions)
xfree(tnfa->transitions);
- if (tnfa->initial)
- {
- for (trans = tnfa->initial; trans->state; trans++)
- {
- if (trans->tags)
- xfree(trans->tags);
- }
- xfree(tnfa->initial);
+ if (tnfa->initial) {
+ for (trans = tnfa->initial; trans->state; trans++) {
+ if (trans->tags)
+ xfree(trans->tags);
}
+ xfree(tnfa->initial);
+ }
- if (tnfa->submatch_data)
- {
- for (i = 0; i < tnfa->num_submatches; i++)
- if (tnfa->submatch_data[i].parents)
- xfree(tnfa->submatch_data[i].parents);
- xfree(tnfa->submatch_data);
- }
+ if (tnfa->submatch_data) {
+ for (i = 0; i < tnfa->num_submatches; i++)
+ if (tnfa->submatch_data[i].parents)
+ xfree(tnfa->submatch_data[i].parents);
+ xfree(tnfa->submatch_data);
+ }
if (tnfa->tag_directions)
xfree(tnfa->tag_directions);

Powered by Google App Engine
This is Rietveld 408576698