fusl/src/regex/regcomp.c - Issue 1714623002: [fusl] clang-format fusl

Side by Side Diff: fusl/src/regex/regcomp.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: headers too Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 regcomp.c - TRE POSIX compatible regex compilation functions.	2 regcomp.c - TRE POSIX compatible regex compilation functions.

3	3

4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>	4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>

5 All rights reserved.	5 All rights reserved.

6	6

7 Redistribution and use in source and binary forms, with or without	7 Redistribution and use in source and binary forms, with or without

8 modification, are permitted provided that the following conditions	8 modification, are permitted provided that the following conditions

9 are met:	9 are met:

10	10

(...skipping 30 matching lines...) Expand all Loading...
41 #include <assert.h>	41 #include <assert.h>

42	42

43 /***********************************************************************	43 /***********************************************************************

44 from tre-compile.h	44 from tre-compile.h

45 ***********************************************************************/	45 ***********************************************************************/

46	46

47 typedef struct {	47 typedef struct {

48 int position;	48 int position;

49 int code_min;	49 int code_min;

50 int code_max;	50 int code_max;

51 int *tags;	51 int* tags;

52 int assertions;	52 int assertions;

53 tre_ctype_t class;	53 tre_ctype_t class;

54 tre_ctype_t *neg_classes;	54 tre_ctype_t* neg_classes;

55 int backref;	55 int backref;

56 } tre_pos_and_tags_t;	56 } tre_pos_and_tags_t;

57	57

58

59 /***********************************************************************	58 /***********************************************************************

60 from tre-ast.c and tre-ast.h	59 from tre-ast.c and tre-ast.h

61 ***********************************************************************/	60 ***********************************************************************/

62	61

63 /* The different AST node types. */	62 /* The different AST node types. */

64 typedef enum {	63 typedef enum { LITERAL, CATENATION, ITERATION, UNION } tre_ast_type_t;

65 LITERAL,

66 CATENATION,

67 ITERATION,

68 UNION

69 } tre_ast_type_t;

70	64

71 /* Special subtypes of TRE_LITERAL. */	65 /* Special subtypes of TRE_LITERAL. */

72 #define EMPTY» -1 /* Empty leaf (denotes empty string). */	66 #define EMPTY -1 /* Empty leaf (denotes empty string). */

73 #define ASSERTION -2 /* Assertion leaf. */	67 #define ASSERTION -2 /* Assertion leaf. */

74 #define TAG» -3 /* Tag leaf. */	68 #define TAG -3 /* Tag leaf. */

75 #define BACKREF» -4 /* Back reference leaf. */	69 #define BACKREF -4 /* Back reference leaf. */

76	70

77 #define IS_SPECIAL(x)» ((x)->code_min < 0)	71 #define IS_SPECIAL(x) ((x)->code_min < 0)

78 #define IS_EMPTY(x)» ((x)->code_min == EMPTY)	72 #define IS_EMPTY(x) ((x)->code_min == EMPTY)

79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)	73 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)

80 #define IS_TAG(x)» ((x)->code_min == TAG)	74 #define IS_TAG(x) ((x)->code_min == TAG)

81 #define IS_BACKREF(x)» ((x)->code_min == BACKREF)	75 #define IS_BACKREF(x) ((x)->code_min == BACKREF)

82

83	76

84 /* A generic AST node. All AST nodes consist of this node on the top	77 /* A generic AST node. All AST nodes consist of this node on the top

85 level with `obj' pointing to the actual content. */	78 level with `obj' pointing to the actual content. */

86 typedef struct {	79 typedef struct {

87 tre_ast_type_t type; /* Type of the node. */	80 tre_ast_type_t type; /* Type of the node. */

88 void obj; / Pointer to actual node. */	81 void* obj; /* Pointer to actual node. */

89 int nullable;	82 int nullable;

90 int submatch_id;	83 int submatch_id;

91 int num_submatches;	84 int num_submatches;

92 int num_tags;	85 int num_tags;

93 tre_pos_and_tags_t *firstpos;	86 tre_pos_and_tags_t* firstpos;

94 tre_pos_and_tags_t *lastpos;	87 tre_pos_and_tags_t* lastpos;

95 } tre_ast_node_t;	88 } tre_ast_node_t;

96	89

97

98 /* A "literal" node. These are created for assertions, back references,	90 /* A "literal" node. These are created for assertions, back references,

99 tags, matching parameter settings, and all expressions that match one	91 tags, matching parameter settings, and all expressions that match one

100 character. */	92 character. */

101 typedef struct {	93 typedef struct {

102 long code_min;	94 long code_min;

103 long code_max;	95 long code_max;

104 int position;	96 int position;

105 tre_ctype_t class;	97 tre_ctype_t class;

106 tre_ctype_t *neg_classes;	98 tre_ctype_t* neg_classes;

107 } tre_literal_t;	99 } tre_literal_t;

108	100

109 /* A "catenation" node. These are created when two regexps are concatenated.	101 /* A "catenation" node. These are created when two regexps are concatenated.

110 If there are more than one subexpressions in sequence, the `left' part	102 If there are more than one subexpressions in sequence, the `left' part

111 holds all but the last, and `right' part holds the last subexpression	103 holds all but the last, and `right' part holds the last subexpression

112 (catenation is left associative). */	104 (catenation is left associative). */

113 typedef struct {	105 typedef struct {

114 tre_ast_node_t *left;	106 tre_ast_node_t* left;

115 tre_ast_node_t *right;	107 tre_ast_node_t* right;

116 } tre_catenation_t;	108 } tre_catenation_t;

117	109

118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"	110 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"

119 operators. */	111 operators. */

120 typedef struct {	112 typedef struct {

121 /* Subexpression to match. */	113 /* Subexpression to match. */

122 tre_ast_node_t *arg;	114 tre_ast_node_t* arg;

123 /* Minimum number of consecutive matches. */	115 /* Minimum number of consecutive matches. */

124 int min;	116 int min;

125 /* Maximum number of consecutive matches. */	117 /* Maximum number of consecutive matches. */

126 int max;	118 int max;

127 /* If 0, match as many characters as possible, if 1 match as few as	119 /* If 0, match as many characters as possible, if 1 match as few as

128 possible. Note that this does not always mean the same thing as	120 possible. Note that this does not always mean the same thing as

129 matching as many/few repetitions as possible. */	121 matching as many/few repetitions as possible. */

130 unsigned int minimal:1;	122 unsigned int minimal : 1;

131 } tre_iteration_t;	123 } tre_iteration_t;

132	124

133 /* An "union" node. These are created for the "\|" operator. */	125 /* An "union" node. These are created for the "\|" operator. */

134 typedef struct {	126 typedef struct {

135 tre_ast_node_t *left;	127 tre_ast_node_t* left;

136 tre_ast_node_t *right;	128 tre_ast_node_t* right;

137 } tre_union_t;	129 } tre_union_t;

138	130

139	131 static tre_ast_node_t* tre_ast_new_node(tre_mem_t mem, int type, void* obj) {

140 static tre_ast_node_t *	132 tre_ast_node_t* node = tre_mem_calloc(mem, sizeof *node);

141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)	133 if (!node \|\| !obj)

142 {	134 return 0;

143 » tre_ast_node_t node = tre_mem_calloc(mem, sizeof node);	135 node->obj = obj;

144 » if (!node \|\| !obj)	136 node->type = type;

145 » » return 0;	137 node->nullable = -1;

146 » node->obj = obj;	138 node->submatch_id = -1;

147 » node->type = type;	139 return node;

148 » node->nullable = -1;

149 » node->submatch_id = -1;

150 » return node;

151 }	140 }

152	141

153 static tre_ast_node_t *	142 static tre_ast_node_t* tre_ast_new_literal(tre_mem_t mem,

154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)	143 int code_min,

155 {	144 int code_max,

156 » tre_ast_node_t *node;	145 int position) {

157 » tre_literal_t *lit;	146 tre_ast_node_t* node;

	147 tre_literal_t* lit;

158	148

159 » lit = tre_mem_calloc(mem, sizeof *lit);	149 lit = tre_mem_calloc(mem, sizeof *lit);

160 » node = tre_ast_new_node(mem, LITERAL, lit);	150 node = tre_ast_new_node(mem, LITERAL, lit);

161 » if (!node)	151 if (!node)

162 » » return 0;	152 return 0;

163 » lit->code_min = code_min;	153 lit->code_min = code_min;

164 » lit->code_max = code_max;	154 lit->code_max = code_max;

165 » lit->position = position;	155 lit->position = position;

166 » return node;	156 return node;

167 }	157 }

168	158

169 static tre_ast_node_t *	159 static tre_ast_node_t* tre_ast_new_iter(tre_mem_t mem,

170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minim al)	160 tre_ast_node_t* arg,

171 {	161 int min,

172 » tre_ast_node_t *node;	162 int max,

173 » tre_iteration_t *iter;	163 int minimal) {

	164 tre_ast_node_t* node;

	165 tre_iteration_t* iter;

174	166

175 » iter = tre_mem_calloc(mem, sizeof *iter);	167 iter = tre_mem_calloc(mem, sizeof *iter);

176 » node = tre_ast_new_node(mem, ITERATION, iter);	168 node = tre_ast_new_node(mem, ITERATION, iter);

177 » if (!node)	169 if (!node)

178 » » return 0;	170 return 0;

179 » iter->arg = arg;	171 iter->arg = arg;

180 » iter->min = min;	172 iter->min = min;

181 » iter->max = max;	173 iter->max = max;

182 » iter->minimal = minimal;	174 iter->minimal = minimal;

183 » node->num_submatches = arg->num_submatches;	175 node->num_submatches = arg->num_submatches;

184 » return node;	176 return node;

185 }	177 }

186	178

187 static tre_ast_node_t *	179 static tre_ast_node_t* tre_ast_new_union(tre_mem_t mem,

188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t right)	180 tre_ast_node_t* left,

189 {	181 tre_ast_node_t* right) {

190 » tre_ast_node_t *node;	182 tre_ast_node_t* node;

191 » tre_union_t *un;	183 tre_union_t* un;

192	184

193 » if (!left)	185 if (!left)

194 » » return right;	186 return right;

195 » un = tre_mem_calloc(mem, sizeof *un);	187 un = tre_mem_calloc(mem, sizeof *un);

196 » node = tre_ast_new_node(mem, UNION, un);	188 node = tre_ast_new_node(mem, UNION, un);

197 » if (!node \|\| !right)	189 if (!node \|\| !right)

198 » » return 0;	190 return 0;

199 » un->left = left;	191 un->left = left;

200 » un->right = right;	192 un->right = right;

201 » node->num_submatches = left->num_submatches + right->num_submatches;	193 node->num_submatches = left->num_submatches + right->num_submatches;

202 » return node;	194 return node;

203 }	195 }

204	196

205 static tre_ast_node_t *	197 static tre_ast_node_t* tre_ast_new_catenation(tre_mem_t mem,

206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t righ t)	198 tre_ast_node_t* left,

207 {	199 tre_ast_node_t* right) {

208 » tre_ast_node_t *node;	200 tre_ast_node_t* node;

209 » tre_catenation_t *cat;	201 tre_catenation_t* cat;

210	202

211 » if (!left)	203 if (!left)

212 » » return right;	204 return right;

213 » cat = tre_mem_calloc(mem, sizeof *cat);	205 cat = tre_mem_calloc(mem, sizeof *cat);

214 » node = tre_ast_new_node(mem, CATENATION, cat);	206 node = tre_ast_new_node(mem, CATENATION, cat);

215 » if (!node)	207 if (!node)

216 » » return 0;	208 return 0;

217 » cat->left = left;	209 cat->left = left;

218 » cat->right = right;	210 cat->right = right;

219 » node->num_submatches = left->num_submatches + right->num_submatches;	211 node->num_submatches = left->num_submatches + right->num_submatches;

220 » return node;	212 return node;

221 }	213 }

222	214

223

224 /***********************************************************************	215 /***********************************************************************

225 from tre-stack.c and tre-stack.h	216 from tre-stack.c and tre-stack.h

226 ***********************************************************************/	217 ***********************************************************************/

227	218

228 typedef struct tre_stack_rec tre_stack_t;	219 typedef struct tre_stack_rec tre_stack_t;

229	220

230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'	221 /* Creates a new stack object. `size' is initial size in bytes, `max_size'

231 is maximum size, and `increment' specifies how much more space will be	222 is maximum size, and `increment' specifies how much more space will be

232 allocated with realloc() if all space gets used up. Returns the stack	223 allocated with realloc() if all space gets used up. Returns the stack

233 object or NULL if out of memory. */	224 object or NULL if out of memory. */

234 static tre_stack_t *	225 static tre_stack_t* tre_stack_new(int size, int max_size, int increment);

235 tre_stack_new(int size, int max_size, int increment);

236	226

237 /* Frees the stack object. */	227 /* Frees the stack object. */

238 static void	228 static void tre_stack_destroy(tre_stack_t* s);

239 tre_stack_destroy(tre_stack_t *s);

240	229

241 /* Returns the current number of objects in the stack. */	230 /* Returns the current number of objects in the stack. */

242 static int	231 static int tre_stack_num_objects(tre_stack_t* s);

243 tre_stack_num_objects(tre_stack_t *s);

244	232

245 /* Each tre_stack_push_(tre_stack_t s, <type> value) function pushes	233 /* Each tre_stack_push_(tre_stack_t s, <type> value) function pushes

246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.	234 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.

247 This tries to realloc() more space before failing if maximum size	235 This tries to realloc() more space before failing if maximum size

248 has not yet been reached. Returns REG_OK if successful. */	236 has not yet been reached. Returns REG_OK if successful. */

249 #define declare_pushf(typetag, type)» » » » » \	237 #define declare_pushf(typetag, type) \

250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)	238 static reg_errcode_t tre_stack_push_##typetag(tre_stack_t* s, type value)

251	239

252 declare_pushf(voidptr, void *);	240 declare_pushf(voidptr, void*);

253 declare_pushf(int, int);	241 declare_pushf(int, int);

254	242

255 /* Each tre_stack_pop_(tre_stack_t s) function pops the topmost	243 /* Each tre_stack_pop_(tre_stack_t s) function pops the topmost

256 element off of stack `s' and returns it. The stack must not be	244 element off of stack `s' and returns it. The stack must not be

257 empty. */	245 empty. */

258 #define declare_popf(typetag, type)» » \	246 #define declare_popf(typetag, type) \

259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)	247 static type tre_stack_pop_##typetag(tre_stack_t* s)

260	248

261 declare_popf(voidptr, void *);	249 declare_popf(voidptr, void*);

262 declare_popf(int, int);	250 declare_popf(int, int);

263	251

264 /* Just to save some typing. */	252 /* Just to save some typing. */

265 #define STACK_PUSH(s, typetag, value)» » » » » \	253 #define STACK_PUSH(s, typetag, value) \

266 do» » » » » » » » » \	254 do { \

267 {» » » » » » » » » \	255 status = tre_stack_push_##typetag(s, value); \

268 status = tre_stack_push_ ## typetag(s, value);» » » \	256 } while (/CONSTCOND/ 0)

269 }» » » » » » » » » \

270 while (/CONSTCOND/0)

271	257

272 #define STACK_PUSHX(s, typetag, value)» » » » » \	258 #define STACK_PUSHX(s, typetag, value) \

273 {» » » » » » » » » \	259 { \

274 status = tre_stack_push_ ## typetag(s, value);» » » \	260 status = tre_stack_push_##typetag(s, value); \

275 if (status != REG_OK)» » » » » » \	261 if (status != REG_OK) \

276 break;» » » » » » » » \	262 break; \

277 }	263 }

278	264

279 #define STACK_PUSHR(s, typetag, value)» » » » » \	265 #define STACK_PUSHR(s, typetag, value) \

280 {» » » » » » » » » \	266 { \

281 reg_errcode_t _status;» » » » » » \	267 reg_errcode_t _status; \

282 _status = tre_stack_push_ ## typetag(s, value);» » » \	268 _status = tre_stack_push_##typetag(s, value); \

283 if (_status != REG_OK)» » » » » » \	269 if (_status != REG_OK) \

284 return _status;» » » » » » » \	270 return _status; \

285 }	271 }

286	272

287 union tre_stack_item {	273 union tre_stack_item {

288 void *voidptr_value;	274 void* voidptr_value;

289 int int_value;	275 int int_value;

290 };	276 };

291	277

292 struct tre_stack_rec {	278 struct tre_stack_rec {

293 int size;	279 int size;

294 int max_size;	280 int max_size;

295 int increment;	281 int increment;

296 int ptr;	282 int ptr;

297 union tre_stack_item *stack;	283 union tre_stack_item* stack;

298 };	284 };

299	285

300	286 static tre_stack_t* tre_stack_new(int size, int max_size, int increment) {

301 static tre_stack_t *	287 tre_stack_t* s;

302 tre_stack_new(int size, int max_size, int increment)

303 {

304 tre_stack_t *s;

305	288

306 s = xmalloc(sizeof(*s));	289 s = xmalloc(sizeof(*s));

307 if (s != NULL)	290 if (s != NULL) {

308 {	291 s->stack = xmalloc(sizeof(s->stack) size);

309 s->stack = xmalloc(sizeof(s->stack) size);	292 if (s->stack == NULL) {

310 if (s->stack == NULL)	293 xfree(s);

311 » {	294 return NULL;

312 » xfree(s);

313 » return NULL;

314 » }

315 s->size = size;

316 s->max_size = max_size;

317 s->increment = increment;

318 s->ptr = 0;

319 }	295 }

	296 s->size = size;

	297 s->max_size = max_size;

	298 s->increment = increment;

	299 s->ptr = 0;

	300 }

320 return s;	301 return s;

321 }	302 }

322	303

323 static void	304 static void tre_stack_destroy(tre_stack_t* s) {

324 tre_stack_destroy(tre_stack_t *s)

325 {

326 xfree(s->stack);	305 xfree(s->stack);

327 xfree(s);	306 xfree(s);

328 }	307 }

329	308

330 static int	309 static int tre_stack_num_objects(tre_stack_t* s) {

331 tre_stack_num_objects(tre_stack_t *s)

332 {

333 return s->ptr;	310 return s->ptr;

334 }	311 }

335	312

336 static reg_errcode_t	313 static reg_errcode_t tre_stack_push(tre_stack_t* s,

337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)	314 union tre_stack_item value) {

338 {	315 if (s->ptr < s->size) {

339 if (s->ptr < s->size)	316 s->stack[s->ptr] = value;

340 {	317 s->ptr++;

341 s->stack[s->ptr] = value;	318 } else {

342 s->ptr++;	319 if (s->size >= s->max_size) {

	320 return REG_ESPACE;

	321 } else {

	322 union tre_stack_item* new_buffer;

	323 int new_size;

	324 new_size = s->size + s->increment;

	325 if (new_size > s->max_size)

	326 new_size = s->max_size;

	327 new_buffer = xrealloc(s->stack, sizeof(new_buffer) new_size);

	328 if (new_buffer == NULL) {

	329 return REG_ESPACE;

	330 }

	331 assert(new_size > s->size);

	332 s->size = new_size;

	333 s->stack = new_buffer;

	334 tre_stack_push(s, value);

343 }	335 }

344 else	336 }

345 {

346 if (s->size >= s->max_size)

347 » {

348 » return REG_ESPACE;

349 » }

350 else

351 » {

352 » union tre_stack_item *new_buffer;

353 » int new_size;

354 » new_size = s->size + s->increment;

355 » if (new_size > s->max_size)

356 » new_size = s->max_size;

357 » new_buffer = xrealloc(s->stack, sizeof(new_buffer) new_size);

358 » if (new_buffer == NULL)

359 » {

360 » return REG_ESPACE;

361 » }

362 » assert(new_size > s->size);

363 » s->size = new_size;

364 » s->stack = new_buffer;

365 » tre_stack_push(s, value);

366 » }

367 }

368 return REG_OK;	337 return REG_OK;

369 }	338 }

370	339

371 #define define_pushf(typetag, type) \	340 #define define_pushf(typetag, type) \

372 declare_pushf(typetag, type) { \	341 declare_pushf(typetag, type) { \

373 union tre_stack_item item;» \	342 union tre_stack_item item; \

374 item.typetag ## _value = value; \	343 item.typetag##_value = value; \

375 return tre_stack_push(s, item); \	344 return tre_stack_push(s, item); \

376 }

377

378 define_pushf(int, int)

379 define_pushf(voidptr, void *)

380

381 #define define_popf(typetag, type)» » \

382 declare_popf(typetag, type) {»» » \

383 return s->stack[--s->ptr].typetag ## _value; \

384 }	345 }

385	346

386 define_popf(int, int)	347 define_pushf(int, int) define_pushf(voidptr, void*)

387 define_popf(voidptr, void *)	348 #define define_popf(typetag, type) \

	349 declare_popf(typetag, type) { return s->stack[--s->ptr].typetag##_value; }

388	350

	351 define_popf(int, int) define_popf(voidptr, void*)

389	352

390 /***********************************************************************	353 /***********************************************************************

391 from tre-parse.c and tre-parse.h	354 from tre-parse.c and tre-parse.h

392 ***********************************************************************/	355 ***********************************************************************/

393	356

394 /* Parse context. */	357 /* Parse context. */

395 typedef struct {	358 typedef struct {

396 » /* Memory allocator. The AST is allocated using this. */	359 /* Memory allocator. The AST is allocated using this. */

397 » tre_mem_t mem;	360 tre_mem_t mem;

398 » /* Stack used for keeping track of regexp syntax. */	361 /* Stack used for keeping track of regexp syntax. */

399 » tre_stack_t *stack;	362 tre_stack_t* stack;

400 » /* The parsed node after a parse function returns. */	363 /* The parsed node after a parse function returns. */

401 » tre_ast_node_t *n;	364 tre_ast_node_t* n;

402 » /* Position in the regexp pattern after a parse function returns. */	365 /* Position in the regexp pattern after a parse function returns. */

403 » const char *s;	366 const char* s;

404 » /* The first character of the regexp. */	367 /* The first character of the regexp. */

405 » const char *re;	368 const char* re;

406 » /* Current submatch ID. */	369 /* Current submatch ID. */

407 » int submatch_id;	370 int submatch_id;

408 » /* Current position (number of literal). */	371 /* Current position (number of literal). */

409 » int position;	372 int position;

410 » /* The highest back reference or -1 if none seen so far. */	373 /* The highest back reference or -1 if none seen so far. */

411 » int max_backref;	374 int max_backref;

412 » /* Compilation flags. */	375 /* Compilation flags. */

413 » int cflags;	376 int cflags;

414 } tre_parse_ctx_t;	377 } tre_parse_ctx_t;

415	378

416 /* Some macros for expanding \w, \s, etc. */	379 /* Some macros for expanding \w, \s, etc. */

417 static const struct {	380 static const struct {

418 » char c;	381 char c;

419 » const char *expansion;	382 const char* expansion;

420 } tre_macros[] = {	383 } tre_macros[] = {{'t', "\t"},

421 » {'t', "\t"}, {'n', "\n"}, {'r', "\r"},	384 {'n', "\n"},

422 » {'f', "\f"}, {'a', "\a"}, {'e', "\033"},	385 {'r', "\r"},

423 » {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},	386 {'f', "\f"},

424 » {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},	387 {'a', "\a"},

425 » { 0, 0 }	388 {'e', "\033"},

426 };	389 {'w', "[[:alnum:]_]"},

	390 {'W', "[^[:alnum:]_]"},

	391 {'s', "[[:space:]]"},

	392 {'S', "[^[:space:]]"},

	393 {'d', "[[:digit:]]"},

	394 {'D', "[^[:digit:]]"},

	395 {0, 0}};

427	396

428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which	397 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which

429 must have at least `len' items. Sets buf[0] to zero if the there	398 must have at least `len' items. Sets buf[0] to zero if the there

430 is no match in `tre_macros'. */	399 is no match in `tre_macros'. */

431 static const char tre_expand_macro(const char s)	400 static const char* tre_expand_macro(const char* s) {

432 {	401 int i;

433 » int i;	402 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++)

434 » for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);	403 ;

435 » return tre_macros[i].expansion;	404 return tre_macros[i].expansion;

436 }	405 }

437	406

438 static int	407 static int tre_compare_lit(const void* a, const void* b) {

439 tre_compare_lit(const void a, const void b)	408 const tre_literal_t* const* la = a;

440 {	409 const tre_literal_t* const* lb = b;

441 » const tre_literal_t const la = a;	410 /* assumes the range of valid code_min is < INT_MAX */

442 » const tre_literal_t const lb = b;	411 return la[0]->code_min - lb[0]->code_min;

443 » /* assumes the range of valid code_min is < INT_MAX */

444 » return la[0]->code_min - lb[0]->code_min;

445 }	412 }

446	413

447 struct literals {	414 struct literals {

448 » tre_mem_t mem;	415 tre_mem_t mem;

449 » tre_literal_t **a;	416 tre_literal_t** a;

450 » int len;	417 int len;

451 » int cap;	418 int cap;

452 };	419 };

453	420

454 static tre_literal_t tre_new_lit(struct literals p)	421 static tre_literal_t* tre_new_lit(struct literals* p) {

455 {	422 tre_literal_t** a;

456 » tre_literal_t **a;	423 if (p->len >= p->cap) {

457 » if (p->len >= p->cap) {	424 if (p->cap >= 1 << 15)

458 » » if (p->cap >= 1<<15)	425 return 0;

459 » » » return 0;	426 p->cap *= 2;

460 » » p->cap *= 2;	427 a = xrealloc(p->a, p->cap * sizeof *p->a);

461 » » a = xrealloc(p->a, p->cap * sizeof *p->a);	428 if (!a)

462 » » if (!a)	429 return 0;

463 » » » return 0;	430 p->a = a;

464 » » p->a = a;	431 }

465 » }	432 a = p->a + p->len++;

466 » a = p->a + p->len++;	433 a = tre_mem_calloc(p->mem, sizeof *a);

467 » a = tre_mem_calloc(p->mem, sizeof *a);	434 return *a;

468 » return *a;

469 }	435 }

470	436

471 static int add_icase_literals(struct literals *ls, int min, int max)	437 static int add_icase_literals(struct literals* ls, int min, int max) {

472 {	438 tre_literal_t* lit;

473 » tre_literal_t *lit;	439 int b, e, c;

474 » int b, e, c;	440 for (c = min; c <= max;) {

475 » for (c=min; c<=max; ) {	441 /* assumes islower(c) and isupper(c) are exclusive

476 » » /* assumes islower(c) and isupper(c) are exclusive	442 and toupper(c)!=c if islower(c).

477 » » and toupper(c)!=c if islower(c).	443 multiple opposite case characters are not supported */

478 » » multiple opposite case characters are not supported */	444 if (tre_islower(c)) {

479 » » if (tre_islower(c)) {	445 b = e = tre_toupper(c);

480 » » » b = e = tre_toupper(c);	446 for (c++, e++; c <= max; c++, e++)

481 » » » for (c++, e++; c<=max; c++, e++)	447 if (tre_toupper(c) != e)

482 » » » » if (tre_toupper(c) != e) break;	448 break;

483 » » } else if (tre_isupper(c)) {	449 } else if (tre_isupper(c)) {

484 » » » b = e = tre_tolower(c);	450 b = e = tre_tolower(c);

485 » » » for (c++, e++; c<=max; c++, e++)	451 for (c++, e++; c <= max; c++, e++)

486 » » » » if (tre_tolower(c) != e) break;	452 if (tre_tolower(c) != e)

487 » » } else {	453 break;

488 » » » c++;	454 } else {

489 » » » continue;	455 c++;

490 » » }	456 continue;

491 » » lit = tre_new_lit(ls);	457 }

492 » » if (!lit)	458 lit = tre_new_lit(ls);

493 » » » return -1;	459 if (!lit)

494 » » lit->code_min = b;	460 return -1;

495 » » lit->code_max = e-1;	461 lit->code_min = b;

496 » » lit->position = -1;	462 lit->code_max = e - 1;

497 » }	463 lit->position = -1;

498 » return 0;	464 }

	465 return 0;

499 }	466 }

500	467

501

502 /* Maximum number of character classes in a negated bracket expression. */	468 /* Maximum number of character classes in a negated bracket expression. */

503 #define MAX_NEG_CLASSES 64	469 #define MAX_NEG_CLASSES 64

504	470

505 struct neg {	471 struct neg {

506 » int negate;	472 int negate;

507 » int len;	473 int len;

508 » tre_ctype_t a[MAX_NEG_CLASSES];	474 tre_ctype_t a[MAX_NEG_CLASSES];

509 };	475 };

510	476

511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges	477 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges

512	478

513 /*	479 /*

514 bracket grammar:	480 bracket grammar:

515 Bracket = '[' List ']' \| '[^' List ']'	481 Bracket = '[' List ']' \| '[^' List ']'

516 List = Term \| List Term	482 List = Term \| List Term

517 Term = Char \| Range \| Chclass \| Eqclass	483 Term = Char \| Range \| Chclass \| Eqclass

518 Range = Char '-' Char \| Char '-' '-'	484 Range = Char '-' Char \| Char '-' '-'

519 Char = Coll \| coll_single	485 Char = Coll \| coll_single

520 Meta = ']' \| '-'	486 Meta = ']' \| '-'

521 Coll = '[.' coll_single '.]' \| '[.' coll_multi '.]' \| '[.' Meta '.]'	487 Coll = '[.' coll_single '.]' \| '[.' coll_multi '.]' \| '[.' Meta '.]'

522 Eqclass = '[=' coll_single '=]' \| '[=' coll_multi '=]'	488 Eqclass = '[=' coll_single '=]' \| '[=' coll_multi '=]'

523 Chclass = '[:' class ':]'	489 Chclass = '[:' class ':]'

524	490

525 coll_single is a single char collating element but it can be	491 coll_single is a single char collating element but it can be

526 '-' only at the beginning or end of a List and	492 '-' only at the beginning or end of a List and

527 ']' only at the beginning of a List and	493 ']' only at the beginning of a List and

528 '^' anywhere except after the openning '['	494 '^' anywhere except after the openning '['

529 */	495 */

530	496

531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t ctx, const char s, st ruct literals ls, struct neg neg)	497 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t* ctx,

532 {	498 const char* s,

533 » const char *start = s;	499 struct literals* ls,

534 » tre_ctype_t class;	500 struct neg* neg) {

535 » int min, max;	501 const char* start = s;

536 » wchar_t wc;	502 tre_ctype_t class;

537 » int len;	503 int min, max;

538	504 wchar_t wc;

539 » for (;;) {	505 int len;

540 » » class = 0;	506

541 » » len = mbtowc(&wc, s, -1);	507 for (;;) {

542 » » if (len <= 0)	508 class = 0;

543 » » » return *s ? REG_BADPAT : REG_EBRACK;	509 len = mbtowc(&wc, s, -1);

544 » » if (*s == ']' && s != start) {	510 if (len <= 0)

545 » » » ctx->s = s+1;	511 return *s ? REG_BADPAT : REG_EBRACK;

546 » » » return REG_OK;	512 if (*s == ']' && s != start) {

547 » » }	513 ctx->s = s + 1;

548 » » if (*s == '-' && s != start && s[1] != ']' &&	514 return REG_OK;

549 » » /* extension: [a-z--@] is accepted as [a-z]\|[--@] */	515 }

550 » » (s[1] != '-' \|\| s[2] == ']'))	516 if (*s == '-' && s != start && s[1] != ']' &&

551 » » » return REG_ERANGE;	517 /* extension: [a-z--@] is accepted as [a-z]\|[--@] */

552 » » if (*s == '[' && (s[1] == '.' \|\| s[1] == '='))	518 (s[1] != '-' \|\| s[2] == ']'))

553 » » » /* collating symbols and equivalence classes are not sup ported */	519 return REG_ERANGE;

554 » » » return REG_ECOLLATE;	520 if (*s == '[' && (s[1] == '.' \|\| s[1] == '='))

555 » » if (*s == '[' && s[1] == ':') {	521 /* collating symbols and equivalence classes are not supported */

556 » » » char tmp[CHARCLASS_NAME_MAX+1];	522 return REG_ECOLLATE;

557 » » » s += 2;	523 if (*s == '[' && s[1] == ':') {

558 » » » for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {	524 char tmp[CHARCLASS_NAME_MAX + 1];

559 » » » » if (s[len] == ':') {	525 s += 2;

560 » » » » » memcpy(tmp, s, len);	526 for (len = 0; len < CHARCLASS_NAME_MAX && s[len]; len++) {

561 » » » » » tmp[len] = 0;	527 if (s[len] == ':') {

562 » » » » » class = tre_ctype(tmp);	528 memcpy(tmp, s, len);

563 » » » » » break;	529 tmp[len] = 0;

564 » » » » }	530 class = tre_ctype(tmp);

565 » » » }	531 break;

566 » » » if (!class \|\| s[len+1] != ']')	532 }

567 » » » » return REG_ECTYPE;	533 }

568 » » » min = 0;	534 if (!class \|\| s[len + 1] != ']')

569 » » » max = TRE_CHAR_MAX;	535 return REG_ECTYPE;

570 » » » s += len+2;	536 min = 0;

571 » » } else {	537 max = TRE_CHAR_MAX;

572 » » » min = max = wc;	538 s += len + 2;

573 » » » s += len;	539 } else {

574 » » » if (*s == '-' && s[1] != ']') {	540 min = max = wc;

575 » » » » s++;	541 s += len;

576 » » » » len = mbtowc(&wc, s, -1);	542 if (*s == '-' && s[1] != ']') {

577 » » » » max = wc;	543 s++;

578 » » » » /* XXX - Should use collation order instead of	544 len = mbtowc(&wc, s, -1);

579 » » » » encoding values in character ranges. */	545 max = wc;

580 » » » » if (len <= 0 \|\| min > max)	546 /* XXX - Should use collation order instead of

581 » » » » » return REG_ERANGE;	547 encoding values in character ranges. */

582 » » » » s += len;	548 if (len <= 0 \|\| min > max)

583 » » » }	549 return REG_ERANGE;

584 » » }	550 s += len;

585	551 }

586 » » if (class && neg->negate) {	552 }

587 » » » if (neg->len >= MAX_NEG_CLASSES)	553

588 » » » » return REG_ESPACE;	554 if (class && neg->negate) {

589 » » » neg->a[neg->len++] = class;	555 if (neg->len >= MAX_NEG_CLASSES)

590 » » } else {	556 return REG_ESPACE;

591 » » » tre_literal_t *lit = tre_new_lit(ls);	557 neg->a[neg->len++] = class;

592 » » » if (!lit)	558 } else {

593 » » » » return REG_ESPACE;	559 tre_literal_t* lit = tre_new_lit(ls);

594 » » » lit->code_min = min;	560 if (!lit)

595 » » » lit->code_max = max;	561 return REG_ESPACE;

596 » » » lit->class = class;	562 lit->code_min = min;

597 » » » lit->position = -1;	563 lit->code_max = max;

598	564 lit->class = class;

599 » » » /* Add opposite-case codepoints if REG_ICASE is present.	565 lit->position = -1;

600 » » » It seems that POSIX requires that bracket negation	566

601 » » » should happen before case-folding, but most practical	567 /* Add opposite-case codepoints if REG_ICASE is present.

602 » » » implementations do it the other way around. Changing	568 It seems that POSIX requires that bracket negation

603 » » » the order would need efficient representation of	569 should happen before case-folding, but most practical

604 » » » case-fold ranges and bracket range sets even with	570 implementations do it the other way around. Changing

605 » » » simple patterns so this is ok for now. */	571 the order would need efficient representation of

606 » » » if (ctx->cflags & REG_ICASE && !class)	572 case-fold ranges and bracket range sets even with

607 » » » » if (add_icase_literals(ls, min, max))	573 simple patterns so this is ok for now. */

608 » » » » » return REG_ESPACE;	574 if (ctx->cflags & REG_ICASE && !class)

609 » » }	575 if (add_icase_literals(ls, min, max))

610 » }	576 return REG_ESPACE;

611 }	577 }

612	578 }

613 static reg_errcode_t parse_bracket(tre_parse_ctx_t ctx, const char s)	579 }

614 {	580

615 » int i, max, min, negmax, negmin;	581 static reg_errcode_t parse_bracket(tre_parse_ctx_t* ctx, const char* s) {

616 » tre_ast_node_t node = 0, n;	582 int i, max, min, negmax, negmin;

617 » tre_ctype_t *nc = 0;	583 tre_ast_node_t node = 0, n;

618 » tre_literal_t *lit;	584 tre_ctype_t* nc = 0;

619 » struct literals ls;	585 tre_literal_t* lit;

620 » struct neg neg;	586 struct literals ls;

621 » reg_errcode_t err;	587 struct neg neg;

622	588 reg_errcode_t err;

623 » ls.mem = ctx->mem;	589

624 » ls.len = 0;	590 ls.mem = ctx->mem;

625 » ls.cap = 32;	591 ls.len = 0;

626 » ls.a = xmalloc(ls.cap * sizeof *ls.a);	592 ls.cap = 32;

627 » if (!ls.a)	593 ls.a = xmalloc(ls.cap * sizeof *ls.a);

628 » » return REG_ESPACE;	594 if (!ls.a)

629 » neg.len = 0;	595 return REG_ESPACE;

630 » neg.negate = *s == '^';	596 neg.len = 0;

631 » if (neg.negate)	597 neg.negate = *s == '^';

632 » » s++;	598 if (neg.negate)

633	599 s++;

634 » err = parse_bracket_terms(ctx, s, &ls, &neg);	600

635 » if (err != REG_OK)	601 err = parse_bracket_terms(ctx, s, &ls, &neg);

636 » » goto parse_bracket_done;	602 if (err != REG_OK)

637	603 goto parse_bracket_done;

638 » if (neg.negate) {	604

639 » » /* Sort the array if we need to negate it. */	605 if (neg.negate) {

640 » » qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);	606 /* Sort the array if we need to negate it. */

641 » » /* extra lit for the last negated range */	607 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);

642 » » lit = tre_new_lit(&ls);	608 /* extra lit for the last negated range */

643 » » if (!lit) {	609 lit = tre_new_lit(&ls);

644 » » » err = REG_ESPACE;	610 if (!lit) {

645 » » » goto parse_bracket_done;	611 err = REG_ESPACE;

646 » » }	612 goto parse_bracket_done;

647 » » lit->code_min = TRE_CHAR_MAX+1;	613 }

648 » » lit->code_max = TRE_CHAR_MAX+1;	614 lit->code_min = TRE_CHAR_MAX + 1;

649 » » lit->position = -1;	615 lit->code_max = TRE_CHAR_MAX + 1;

650 » » /* negated classes */	616 lit->position = -1;

651 » » if (neg.len) {	617 /* negated classes */

652 » » » nc = tre_mem_alloc(ctx->mem, (neg.len+1)sizeof neg.a);	618 if (neg.len) {

653 » » » if (!nc) {	619 nc = tre_mem_alloc(ctx->mem, (neg.len + 1) * sizeof *neg.a);

654 » » » » err = REG_ESPACE;	620 if (!nc) {

655 » » » » goto parse_bracket_done;	621 err = REG_ESPACE;

656 » » » }	622 goto parse_bracket_done;

657 » » » memcpy(nc, neg.a, neg.lensizeof neg.a);	623 }

658 » » » nc[neg.len] = 0;	624 memcpy(nc, neg.a, neg.len * sizeof *neg.a);

659 » » }	625 nc[neg.len] = 0;

660 » }	626 }

661	627 }

662 » /* Build a union of the items in the array, negated if necessary. */	628

663 » negmax = negmin = 0;	629 /* Build a union of the items in the array, negated if necessary. */

664 » for (i = 0; i < ls.len; i++) {	630 negmax = negmin = 0;

665 » » lit = ls.a[i];	631 for (i = 0; i < ls.len; i++) {

666 » » min = lit->code_min;	632 lit = ls.a[i];

667 » » max = lit->code_max;	633 min = lit->code_min;

668 » » if (neg.negate) {	634 max = lit->code_max;

669 » » » if (min <= negmin) {	635 if (neg.negate) {

670 » » » » /* Overlap. */	636 if (min <= negmin) {

671 » » » » negmin = MAX(max + 1, negmin);	637 /* Overlap. */

672 » » » » continue;	638 negmin = MAX(max + 1, negmin);

673 » » » }	639 continue;

674 » » » negmax = min - 1;	640 }

675 » » » lit->code_min = negmin;	641 negmax = min - 1;

676 » » » lit->code_max = negmax;	642 lit->code_min = negmin;

677 » » » negmin = max + 1;	643 lit->code_max = negmax;

678 » » }	644 negmin = max + 1;

679 » » lit->position = ctx->position;	645 }

680 » » lit->neg_classes = nc;	646 lit->position = ctx->position;

681 » » n = tre_ast_new_node(ctx->mem, LITERAL, lit);	647 lit->neg_classes = nc;

682 » » node = tre_ast_new_union(ctx->mem, node, n);	648 n = tre_ast_new_node(ctx->mem, LITERAL, lit);

683 » » if (!node) {	649 node = tre_ast_new_union(ctx->mem, node, n);

684 » » » err = REG_ESPACE;	650 if (!node) {

685 » » » break;	651 err = REG_ESPACE;

686 » » }	652 break;

687 » }	653 }

	654 }

688	655

689 parse_bracket_done:	656 parse_bracket_done:

690 » xfree(ls.a);	657 xfree(ls.a);

691 » ctx->position++;	658 ctx->position++;

692 » ctx->n = node;	659 ctx->n = node;

693 » return err;	660 return err;

694 }	661 }

695	662

696 static const char parse_dup_count(const char s, int *n)	663 static const char* parse_dup_count(const char* s, int* n) {

697 {	664 *n = -1;

698 » *n = -1;	665 if (!isdigit(*s))

699 » if (!isdigit(*s))	666 return s;

700 » » return s;	667 *n = 0;

701 » *n = 0;	668 for (;;) {

702 » for (;;) {	669 n = 10 n + (s - '0');

703 » » n = 10 n + (s - '0');	670 s++;

704 » » s++;	671 if (!isdigit(s) \|\| n > RE_DUP_MAX)

705 » » if (!isdigit(s) \|\| n > RE_DUP_MAX)	672 break;

706 » » » break;	673 }

707 » }	674 return s;

708 » return s;	675 }

709 }	676

710	677 static const char* parse_dup(const char* s, int ere, int* pmin, int* pmax) {

711 static const char parse_dup(const char s, int ere, int pmin, int pmax)	678 int min, max;

712 {	679

713 » int min, max;	680 s = parse_dup_count(s, &min);

714	681 if (*s == ',')

715 » s = parse_dup_count(s, &min);	682 s = parse_dup_count(s + 1, &max);

716 » if (*s == ',')	683 else

717 » » s = parse_dup_count(s+1, &max);	684 max = min;

718 » else	685

719 » » max = min;	686 if ((max < min && max >= 0) \|\| max > RE_DUP_MAX \|\| min > RE_DUP_MAX \|\|

720	687 min < 0 \|\| (!ere && s++ != '\\') \|\| s++ != '}')

721 » if (	688 return 0;

722 » » (max < min && max >= 0) \|\|	689 *pmin = min;

723 » » max > RE_DUP_MAX \|\|	690 *pmax = max;

724 » » min > RE_DUP_MAX \|\|	691 return s;

725 » » min < 0 \|\|	692 }

726 » » (!ere && *s++ != '\\') \|\|	693

727 » » *s++ != '}'	694 static int hexval(unsigned c) {

728 » )	695 if (c - '0' < 10)

729 » » return 0;	696 return c - '0';

730 » *pmin = min;	697 c \|= 32;

731 » *pmax = max;	698 if (c - 'a' < 6)

732 » return s;	699 return c - 'a' + 10;

733 }	700 return -1;

734	701 }

735 static int hexval(unsigned c)	702

736 {	703 static reg_errcode_t marksub(tre_parse_ctx_t* ctx,

737 » if (c-'0'<10) return c-'0';	704 tre_ast_node_t* node,

738 » c \|= 32;	705 int subid) {

739 » if (c-'a'<6) return c-'a'+10;	706 if (node->submatch_id >= 0) {

740 » return -1;	707 tre_ast_node_t* n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

741 }	708 if (!n)

742	709 return REG_ESPACE;

743 static reg_errcode_t marksub(tre_parse_ctx_t ctx, tre_ast_node_t node, int sub id)	710 n = tre_ast_new_catenation(ctx->mem, n, node);

744 {	711 if (!n)

745 » if (node->submatch_id >= 0) {	712 return REG_ESPACE;

746 » » tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1) ;	713 n->num_submatches = node->num_submatches;

747 » » if (!n)	714 node = n;

748 » » » return REG_ESPACE;	715 }

749 » » n = tre_ast_new_catenation(ctx->mem, n, node);	716 node->submatch_id = subid;

750 » » if (!n)	717 node->num_submatches++;

751 » » » return REG_ESPACE;	718 ctx->n = node;

752 » » n->num_submatches = node->num_submatches;	719 return REG_OK;

753 » » node = n;

754 » }

755 » node->submatch_id = subid;

756 » node->num_submatches++;

757 » ctx->n = node;

758 » return REG_OK;

759 }	720 }

760	721

761 /*	722 /*

762 BRE grammar:	723 BRE grammar:

763 Regex = Branch \| '^' \| '$' \| '^$' \| '^' Branch \| Branch '$' \| '^' Branch '$'	724 Regex = Branch \| '^' \| '$' \| '^$' \| '^' Branch \| Branch '$' \| '^'

	725 Branch '$'

764 Branch = Atom \| Branch Atom	726 Branch = Atom \| Branch Atom

765 Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '$' Branch '$' \| back_ref	727 Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '\(' Branch

766 Dup = '*' \| '\{' Count '\}' \| '\{' Count ',\}' \| '\{' Count ',' Count '\}'	728 '\)' \| back_ref

	729 Dup = '*' \| '\{' Count '\}' \| '\{' Count ',\}' \| '\{' Count ',' Count

	730 '\}'

767	731

768 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)	732 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)

769	733

770 ERE grammar:	734 ERE grammar:

771 Regex = Branch \| Regex '\|' Branch	735 Regex = Branch \| Regex '\|' Branch

772 Branch = Atom \| Branch Atom	736 Branch = Atom \| Branch Atom

773 Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '(' Regex ') ' \| '^' \| '$'	737 Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '(' Regex

774 Dup = '*' \| '+' \| '?' \| '{' Count '}' \| '{' Count ',}' \| '{' Count ',' Count '}'	738 ')' \| '^' \| '$'

	739 Dup = '*' \| '+' \| '?' \| '{' Count '}' \| '{' Count ',}' \| '{' Count

	740 ',' Count '}'

775	741

776 (a+?, ^, $+, \X, {, (\|a) are unspecified)	742 (a+?, ^, $+, \X, {, (\|a) are unspecified)

777 */	743 */

778	744

779 static reg_errcode_t parse_atom(tre_parse_ctx_t ctx, const char s)	745 static reg_errcode_t parse_atom(tre_parse_ctx_t* ctx, const char* s) {

780 {	746 int len, ere = ctx->cflags & REG_EXTENDED;

781 » int len, ere = ctx->cflags & REG_EXTENDED;	747 const char* p;

782 » const char *p;	748 tre_ast_node_t* node;

783 » tre_ast_node_t *node;	749 wchar_t wc;

784 » wchar_t wc;	750 switch (*s) {

785 » switch (*s) {	751 case '[':

786 » case '[':	752 return parse_bracket(ctx, s + 1);

787 » » return parse_bracket(ctx, s+1);	753 case '\\':

788 » case '\\':	754 p = tre_expand_macro(s + 1);

789 » » p = tre_expand_macro(s+1);	755 if (p) {

790 » » if (p) {	756 /* assume \X expansion is a single atom */

791 » » » /* assume \X expansion is a single atom */	757 reg_errcode_t err = parse_atom(ctx, p);

792 » » » reg_errcode_t err = parse_atom(ctx, p);	758 ctx->s = s + 2;

793 » » » ctx->s = s+2;	759 return err;

794 » » » return err;	760 }

795 » » }	761 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */

796 » » /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */	762 switch (*++s) {

797 » » switch (*++s) {	763 case 0:

798 » » case 0:	764 return REG_EESCAPE;

799 » » » return REG_EESCAPE;	765 case 'b':

800 » » case 'b':	766 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);

801 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_WB, -1);	767 break;

802 » » » break;	768 case 'B':

803 » » case 'B':	769 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);

804 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_WB_NEG, -1);	770 break;

805 » » » break;	771 case '<':

806 » » case '<':	772 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);

807 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_BOW, -1);	773 break;

808 » » » break;	774 case '>':

809 » » case '>':	775 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);

810 » » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_A T_EOW, -1);	776 break;

811 » » » break;	777 case 'x':

812 » » case 'x':	778 s++;

813 » » » s++;	779 int i, v = 0, c;

814 » » » int i, v = 0, c;	780 len = 2;

815 » » » len = 2;	781 if (*s == '{') {

816 » » » if (*s == '{') {	782 len = 8;

817 » » » » len = 8;	783 s++;

818 » » » » s++;	784 }

819 » » » }	785 for (i = 0; i < len && v < 0x110000; i++) {

820 » » » for (i=0; i<len && v<0x110000; i++) {	786 c = hexval(s[i]);

821 » » » » c = hexval(s[i]);	787 if (c < 0)

822 » » » » if (c < 0) break;	788 break;

823 » » » » v = 16*v + c;	789 v = 16 * v + c;

824 » » » }	790 }

825 » » » s += i;	791 s += i;

826 » » » if (len == 8) {	792 if (len == 8) {

827 » » » » if (*s != '}')	793 if (*s != '}')

828 » » » » » return REG_EBRACE;	794 return REG_EBRACE;

829 » » » » s++;	795 s++;

830 » » » }	796 }

831 » » » node = tre_ast_new_literal(ctx->mem, v, v, ctx->position ++);	797 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);

832 » » » s--;	798 s--;

833 » » » break;	799 break;

834 » » case '{':	800 case '{':

835 » » case '+':	801 case '+':

836 » » case '?':	802 case '?':

837 » » » /* extension: treat \+, \? as repetitions in BRE */	803 /* extension: treat \+, \? as repetitions in BRE */

838 » » » /* reject repetitions after empty expression in BRE */	804 /* reject repetitions after empty expression in BRE */

839 » » » if (!ere)	805 if (!ere)

840 » » » » return REG_BADRPT;	806 return REG_BADRPT;

841 » » case '\|':	807 case '\|':

842 » » » /* extension: treat \\| as alternation in BRE */	808 /* extension: treat \\| as alternation in BRE */

843 » » » if (!ere) {	809 if (!ere) {

844 » » » » node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);	810 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

845 » » » » s--;	811 s--;

846 » » » » goto end;	812 goto end;

847 » » » }	813 }

848 » » » /* fallthrough */	814 /* fallthrough */

849 » » default:	815 default:

850 » » » if (!ere && (unsigned)*s-'1' < 9) {	816 if (!ere && (unsigned)*s - '1' < 9) {

851 » » » » /* back reference */	817 /* back reference */

852 » » » » int val = *s - '0';	818 int val = *s - '0';

853 » » » » node = tre_ast_new_literal(ctx->mem, BACKREF, va l, ctx->position++);	819 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);

854 » » » » ctx->max_backref = MAX(val, ctx->max_backref);	820 ctx->max_backref = MAX(val, ctx->max_backref);

855 » » » } else {	821 } else {

856 » » » » /* extension: accept unknown escaped char	822 /* extension: accept unknown escaped char

857 » » » » as a literal */	823 as a literal */

858 » » » » goto parse_literal;	824 goto parse_literal;

859 » » » }	825 }

860 » » }	826 }

861 » » s++;	827 s++;

862 » » break;	828 break;

863 » case '.':	829 case '.':

864 » » if (ctx->cflags & REG_NEWLINE) {	830 if (ctx->cflags & REG_NEWLINE) {

865 » » » tre_ast_node_t tmp1, tmp2;	831 tre_ast_node_t tmp1, tmp2;

866 » » » tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos ition++);	832 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n' - 1, ctx->position++);

867 » » » tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA X, ctx->position++);	833 tmp2 = tre_ast_new_literal(ctx->mem, '\n' + 1, TRE_CHAR_MAX,

868 » » » if (tmp1 && tmp2)	834 ctx->position++);

869 » » » » node = tre_ast_new_union(ctx->mem, tmp1, tmp2);	835 if (tmp1 && tmp2)

870 » » » else	836 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);

871 » » » » node = 0;	837 else

872 » » } else {	838 node = 0;

873 » » » node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ct x->position++);	839 } else {

874 » » }	840 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);

875 » » s++;	841 }

876 » » break;	842 s++;

877 » case '^':	843 break;

878 » » /* '^' has a special meaning everywhere in EREs, and at beginnin g of BRE. */	844 case '^':

879 » » if (!ere && s != ctx->re)	845 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE.

880 » » » goto parse_literal;	846 */

881 » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, - 1);	847 if (!ere && s != ctx->re)

882 » » s++;	848 goto parse_literal;

883 » » break;	849 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);

884 » case '$':	850 s++;

885 » » /* '$' is special everywhere in EREs, and in the end of the stri ng in BREs. */	851 break;

886 » » if (!ere && s[1])	852 case '$':

887 » » » goto parse_literal;	853 /* '$' is special everywhere in EREs, and in the end of the string in

888 » » node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, - 1);	854 * BREs. */

889 » » s++;	855 if (!ere && s[1])

890 » » break;	856 goto parse_literal;

891 » case '*':	857 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);

892 » » return REG_BADPAT;	858 s++;

893 » case '{':	859 break;

894 » case '+':	860 case '*':

895 » case '?':	861 return REG_BADPAT;

896 » » /* reject repetitions after empty expression in ERE */	862 case '{':

897 » » if (ere)	863 case '+':

898 » » » return REG_BADRPT;	864 case '?':

899 » case '\|':	865 /* reject repetitions after empty expression in ERE */

900 » » if (!ere)	866 if (ere)

901 » » » goto parse_literal;	867 return REG_BADRPT;

902 » case 0:	868 case '\|':

903 » » node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);	869 if (!ere)

904 » » break;	870 goto parse_literal;

905 » default:	871 case 0:

906 parse_literal:	872 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

907 » » len = mbtowc(&wc, s, -1);	873 break;

908 » » if (len < 0)	874 default:

909 » » » return REG_BADPAT;	875 parse_literal:

910 » » if (ctx->cflags & REG_ICASE && (tre_isupper(wc) \|\| tre_islower(w c))) {	876 len = mbtowc(&wc, s, -1);

911 » » » tre_ast_node_t tmp1, tmp2;	877 if (len < 0)

912 » » » /* multiple opposite case characters are not supported * /	878 return REG_BADPAT;

913 » » » tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr e_toupper(wc), ctx->position);	879 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) \|\| tre_islower(wc))) {

914 » » » tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr e_tolower(wc), ctx->position);	880 tre_ast_node_t tmp1, tmp2;

915 » » » if (tmp1 && tmp2)	881 /* multiple opposite case characters are not supported */

916 » » » » node = tre_ast_new_union(ctx->mem, tmp1, tmp2);	882 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc),

917 » » » else	883 ctx->position);

918 » » » » node = 0;	884 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc),

919 » » } else {	885 ctx->position);

920 » » » node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi on);	886 if (tmp1 && tmp2)

921 » » }	887 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);

922 » » ctx->position++;	888 else

923 » » s += len;	889 node = 0;

924 » » break;	890 } else {

925 » }	891 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);

	892 }

	893 ctx->position++;

	894 s += len;

	895 break;

	896 }

926 end:	897 end:

927 » if (!node)	898 if (!node)

928 » » return REG_ESPACE;	899 return REG_ESPACE;

929 » ctx->n = node;	900 ctx->n = node;

930 » ctx->s = s;	901 ctx->s = s;

931 » return REG_OK;	902 return REG_OK;

932 }	903 }

933	904

934 #define PUSHPTR(err, s, v) do { \	905 #define PUSHPTR(err, s, v) \

935 » if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \	906 do { \

936 » » return err; \	907 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \

937 } while(0)	908 return err; \

938	909 } while (0)

939 #define PUSHINT(err, s, v) do { \	910

940 » if ((err = tre_stack_push_int(s, v)) != REG_OK) \	911 #define PUSHINT(err, s, v) \

941 » » return err; \	912 do { \

942 } while(0)	913 if ((err = tre_stack_push_int(s, v)) != REG_OK) \

943	914 return err; \

944 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)	915 } while (0)

945 {	916

946 » tre_ast_node_t nbranch=0, nunion=0;	917 static reg_errcode_t tre_parse(tre_parse_ctx_t* ctx) {

947 » int ere = ctx->cflags & REG_EXTENDED;	918 tre_ast_node_t nbranch = 0, nunion = 0;

948 » const char *s = ctx->re;	919 int ere = ctx->cflags & REG_EXTENDED;

949 » int subid = 0;	920 const char* s = ctx->re;

950 » int depth = 0;	921 int subid = 0;

951 » reg_errcode_t err;	922 int depth = 0;

952 » tre_stack_t *stack = ctx->stack;	923 reg_errcode_t err;

953	924 tre_stack_t* stack = ctx->stack;

954 » PUSHINT(err, stack, subid++);	925

955 » for (;;) {	926 PUSHINT(err, stack, subid++);

956 » » if ((!ere && *s == '\\' && s[1] == '(') \|\|	927 for (;;) {

957 » » (ere && *s == '(')) {	928 if ((!ere && s == '\\' && s[1] == '(') \|\| (ere && s == '(')) {

958 » » » PUSHPTR(err, stack, nunion);	929 PUSHPTR(err, stack, nunion);

959 » » » PUSHPTR(err, stack, nbranch);	930 PUSHPTR(err, stack, nbranch);

960 » » » PUSHINT(err, stack, subid++);	931 PUSHINT(err, stack, subid++);

961 » » » s++;	932 s++;

962 » » » if (!ere)	933 if (!ere)

963 » » » » s++;	934 s++;

964 » » » depth++;	935 depth++;

965 » » » nbranch = nunion = 0;	936 nbranch = nunion = 0;

966 » » » continue;	937 continue;

967 » » }	938 }

968 » » if ((!ere && *s == '\\' && s[1] == ')') \|\|	939 if ((!ere && s == '\\' && s[1] == ')') \|\| (ere && s == ')' && depth)) {

969 » » (ere && *s == ')' && depth)) {	940 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

970 » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);	941 if (!ctx->n)

971 » » » if (!ctx->n)	942 return REG_ESPACE;

972 » » » » return REG_ESPACE;	943 } else {

973 » » } else {	944 err = parse_atom(ctx, s);

974 » » » err = parse_atom(ctx, s);	945 if (err != REG_OK)

975 » » » if (err != REG_OK)	946 return err;

976 » » » » return err;	947 s = ctx->s;

977 » » » s = ctx->s;	948 }

978 » » }	949

979	950 parse_iter:

980 » parse_iter:	951 /* extension: repetitions are rejected after an empty node

981 » » /* extension: repetitions are rejected after an empty node	952 eg. (+), \|*, {2}, but assertions are not treated as empty

982 » » eg. (+), \|*, {2}, but assertions are not treated as empty	953 so ^* or $? are accepted currently. */

983 » » so ^* or $? are accepted currently. */	954 for (;;) {

984 » » for (;;) {	955 int min, max;

985 » » » int min, max;	956

986	957 if (s != '\\' && s != '*') {

987 » » » if (s!='\\' && s!='*') {	958 if (!ere)

988 » » » » if (!ere)	959 break;

989 » » » » » break;	960 if (s != '+' && s != '?' && *s != '{')

990 » » » » if (s!='+' && s!='?' && *s!='{')	961 break;

991 » » » » » break;	962 }

992 » » » }	963 if (*s == '\\' && ere)

993 » » » if (*s=='\\' && ere)	964 break;

994 » » » » break;	965 /* extension: treat \+, \? as repetitions in BRE */

995 » » » /* extension: treat \+, \? as repetitions in BRE */	966 if (*s == '\\' && s[1] != '+' && s[1] != '?' && s[1] != '{')

996 » » » if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')	967 break;

997 » » » » break;	968 if (*s == '\\')

998 » » » if (*s=='\\')	969 s++;

999 » » » » s++;	970

1000	971 /* extension: multiple consecutive *+?{,} is unspecified,

1001 » » » /* extension: multiple consecutive *+?{,} is unspecified ,	972 but (a+)+ has to be supported so accepting a++ makes

1002 » » » but (a+)+ has to be supported so accepting a++ makes	973 sense, note however that the RE_DUP_MAX limit can be

1003 » » » sense, note however that the RE_DUP_MAX limit can be	974 circumvented: (a{255}){255} uses a lot of memory.. */

1004 » » » circumvented: (a{255}){255} uses a lot of memory.. */	975 if (*s == '{') {

1005 » » » if (*s=='{') {	976 s = parse_dup(s + 1, ere, &min, &max);

1006 » » » » s = parse_dup(s+1, ere, &min, &max);	977 if (!s)

1007 » » » » if (!s)	978 return REG_BADBR;

1008 » » » » » return REG_BADBR;	979 } else {

1009 » » » } else {	980 min = 0;

1010 » » » » min=0;	981 max = -1;

1011 » » » » max=-1;	982 if (*s == '+')

1012 » » » » if (*s == '+')	983 min = 1;

1013 » » » » » min = 1;	984 if (*s == '?')

1014 » » » » if (*s == '?')	985 max = 1;

1015 » » » » » max = 1;	986 s++;

1016 » » » » s++;	987 }

1017 » » » }	988 if (max == 0)

1018 » » » if (max == 0)	989 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

1019 » » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1 , -1);	990 else

1020 » » » else	991 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);

1021 » » » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);	992 if (!ctx->n)

1022 » » » if (!ctx->n)	993 return REG_ESPACE;

1023 » » » » return REG_ESPACE;	994 }

1024 » » }	995

1025	996 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);

1026 » » nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);	997 if ((ere && s == '\|') \|\| (ere && s == ')' && depth) \|\|

1027 » » if ((ere && *s == '\|') \|\|	998 (!ere && *s == '\\' && s[1] == ')') \|\|

1028 » » (ere && *s == ')' && depth) \|\|	999 /* extension: treat \\| as alternation in BRE */

1029 » » (!ere && *s == '\\' && s[1] == ')') \|\|	1000 (!ere && s == '\\' && s[1] == '\|') \|\| !s) {

1030 » » /* extension: treat \\| as alternation in BRE */	1001 /* extension: empty branch is unspecified (), (\|a), (a\|)

1031 » » (!ere && *s == '\\' && s[1] == '\|') \|\|	1002 here they are not rejected but match on empty string */

1032 » » !*s) {	1003 int c = *s;

1033 » » » /* extension: empty branch is unspecified (), (\|a), (a\|)	1004 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);

1034 » » » here they are not rejected but match on empty string */	1005 nbranch = 0;

1035 » » » int c = *s;	1006

1036 » » » nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);	1007 if (c == '\\' && s[1] == '\|') {

1037 » » » nbranch = 0;	1008 s += 2;

1038	1009 } else if (c == '\|') {

1039 » » » if (c == '\\' && s[1] == '\|') {	1010 s++;

1040 » » » » s+=2;	1011 } else {

1041 » » » } else if (c == '\|') {	1012 if (c == '\\') {

1042 » » » » s++;	1013 if (!depth)

1043 » » » } else {	1014 return REG_EPAREN;

1044 » » » » if (c == '\\') {	1015 s += 2;

1045 » » » » » if (!depth) return REG_EPAREN;	1016 } else if (c == ')')

1046 » » » » » s+=2;	1017 s++;

1047 » » » » } else if (c == ')')	1018 depth--;

1048 » » » » » s++;	1019 err = marksub(ctx, nunion, tre_stack_pop_int(stack));

1049 » » » » depth--;	1020 if (err != REG_OK)

1050 » » » » err = marksub(ctx, nunion, tre_stack_pop_int(sta ck));	1021 return err;

1051 » » » » if (err != REG_OK)	1022 if (!c && depth < 0) {

1052 » » » » » return err;	1023 ctx->submatch_id = subid;

1053 » » » » if (!c && depth<0) {	1024 return REG_OK;

1054 » » » » » ctx->submatch_id = subid;	1025 }

1055 » » » » » return REG_OK;	1026 if (!c \|\| depth < 0)

1056 » » » » }	1027 return REG_EPAREN;

1057 » » » » if (!c \|\| depth<0)	1028 nbranch = tre_stack_pop_voidptr(stack);

1058 » » » » » return REG_EPAREN;	1029 nunion = tre_stack_pop_voidptr(stack);

1059 » » » » nbranch = tre_stack_pop_voidptr(stack);	1030 goto parse_iter;

1060 » » » » nunion = tre_stack_pop_voidptr(stack);	1031 }

1061 » » » » goto parse_iter;	1032 }

1062 » » » }	1033 }

1063 » » }	1034 }

1064 » }

1065 }

1066

1067	1035

1068 /***********************************************************************	1036 /***********************************************************************

1069 from tre-compile.c	1037 from tre-compile.c

1070 ***********************************************************************/	1038 ***********************************************************************/

1071	1039

1072

1073 /*	1040 /*

1074 TODO:	1041 TODO:

1075 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive	1042 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive

1076 function calls.	1043 function calls.

1077 */	1044 */

1078	1045

1079 /*	1046 /*

1080 Algorithms to setup tags so that submatch addressing can be done.	1047 Algorithms to setup tags so that submatch addressing can be done.

1081 */	1048 */

1082	1049

1083

1084 /* Inserts a catenation node to the root of the tree given in `node'.	1050 /* Inserts a catenation node to the root of the tree given in `node'.

1085 As the left child a new tag with number `tag_id' to `node' is added,	1051 As the left child a new tag with number `tag_id' to `node' is added,

1086 and the right child is the old root. */	1052 and the right child is the old root. */

1087 static reg_errcode_t	1053 static reg_errcode_t tre_add_tag_left(tre_mem_t mem,

1088 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)	1054 tre_ast_node_t* node,

1089 {	1055 int tag_id) {

1090 tre_catenation_t *c;	1056 tre_catenation_t* c;

1091	1057

1092 c = tre_mem_alloc(mem, sizeof(*c));	1058 c = tre_mem_alloc(mem, sizeof(*c));

1093 if (c == NULL)	1059 if (c == NULL)

1094 return REG_ESPACE;	1060 return REG_ESPACE;

1095 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);	1061 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);

1096 if (c->left == NULL)	1062 if (c->left == NULL)

1097 return REG_ESPACE;	1063 return REG_ESPACE;

1098 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));	1064 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));

1099 if (c->right == NULL)	1065 if (c->right == NULL)

1100 return REG_ESPACE;	1066 return REG_ESPACE;

1101	1067

1102 c->right->obj = node->obj;	1068 c->right->obj = node->obj;

1103 c->right->type = node->type;	1069 c->right->type = node->type;

1104 c->right->nullable = -1;	1070 c->right->nullable = -1;

1105 c->right->submatch_id = -1;	1071 c->right->submatch_id = -1;

1106 c->right->firstpos = NULL;	1072 c->right->firstpos = NULL;

1107 c->right->lastpos = NULL;	1073 c->right->lastpos = NULL;

1108 c->right->num_tags = 0;	1074 c->right->num_tags = 0;

1109 node->obj = c;	1075 node->obj = c;

1110 node->type = CATENATION;	1076 node->type = CATENATION;

1111 return REG_OK;	1077 return REG_OK;

1112 }	1078 }

1113	1079

1114 /* Inserts a catenation node to the root of the tree given in `node'.	1080 /* Inserts a catenation node to the root of the tree given in `node'.

1115 As the right child a new tag with number `tag_id' to `node' is added,	1081 As the right child a new tag with number `tag_id' to `node' is added,

1116 and the left child is the old root. */	1082 and the left child is the old root. */

1117 static reg_errcode_t	1083 static reg_errcode_t tre_add_tag_right(tre_mem_t mem,

1118 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)	1084 tre_ast_node_t* node,

1119 {	1085 int tag_id) {

1120 tre_catenation_t *c;	1086 tre_catenation_t* c;

1121	1087

1122 c = tre_mem_alloc(mem, sizeof(*c));	1088 c = tre_mem_alloc(mem, sizeof(*c));

1123 if (c == NULL)	1089 if (c == NULL)

1124 return REG_ESPACE;	1090 return REG_ESPACE;

1125 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);	1091 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);

1126 if (c->right == NULL)	1092 if (c->right == NULL)

1127 return REG_ESPACE;	1093 return REG_ESPACE;

1128 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));	1094 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));

1129 if (c->left == NULL)	1095 if (c->left == NULL)

1130 return REG_ESPACE;	1096 return REG_ESPACE;

(...skipping 13 matching lines...) Expand all Loading...
1144 typedef enum {	1110 typedef enum {

1145 ADDTAGS_RECURSE,	1111 ADDTAGS_RECURSE,

1146 ADDTAGS_AFTER_ITERATION,	1112 ADDTAGS_AFTER_ITERATION,

1147 ADDTAGS_AFTER_UNION_LEFT,	1113 ADDTAGS_AFTER_UNION_LEFT,

1148 ADDTAGS_AFTER_UNION_RIGHT,	1114 ADDTAGS_AFTER_UNION_RIGHT,

1149 ADDTAGS_AFTER_CAT_LEFT,	1115 ADDTAGS_AFTER_CAT_LEFT,

1150 ADDTAGS_AFTER_CAT_RIGHT,	1116 ADDTAGS_AFTER_CAT_RIGHT,

1151 ADDTAGS_SET_SUBMATCH_END	1117 ADDTAGS_SET_SUBMATCH_END

1152 } tre_addtags_symbol_t;	1118 } tre_addtags_symbol_t;

1153	1119

1154

1155 typedef struct {	1120 typedef struct {

1156 int tag;	1121 int tag;

1157 int next_tag;	1122 int next_tag;

1158 } tre_tag_states_t;	1123 } tre_tag_states_t;

1159	1124

1160

1161 /* Go through `regset' and set submatch data for submatches that are	1125 /* Go through `regset' and set submatch data for submatches that are

1162 using this tag. */	1126 using this tag. */

1163 static void	1127 static void tre_purge_regset(int* regset, tre_tnfa_t* tnfa, int tag) {

1164 tre_purge_regset(int regset, tre_tnfa_t tnfa, int tag)

1165 {

1166 int i;	1128 int i;

1167	1129

1168 for (i = 0; regset[i] >= 0; i++)	1130 for (i = 0; regset[i] >= 0; i++) {

1169 {	1131 int id = regset[i] / 2;

1170 int id = regset[i] / 2;	1132 int start = !(regset[i] % 2);

1171 int start = !(regset[i] % 2);	1133 if (start)

1172 if (start)	1134 tnfa->submatch_data[id].so_tag = tag;

1173 » tnfa->submatch_data[id].so_tag = tag;	1135 else

1174 else	1136 tnfa->submatch_data[id].eo_tag = tag;

1175 » tnfa->submatch_data[id].eo_tag = tag;	1137 }

1176 }

1177 regset[0] = -1;	1138 regset[0] = -1;

1178 }	1139 }

1179	1140

1180

1181 /* Adds tags to appropriate locations in the parse tree in `tree', so that	1141 /* Adds tags to appropriate locations in the parse tree in `tree', so that

1182 subexpressions marked for submatch addressing can be traced. */	1142 subexpressions marked for submatch addressing can be traced. */

1183 static reg_errcode_t	1143 static reg_errcode_t tre_add_tags(tre_mem_t mem,

1184 tre_add_tags(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree,	1144 tre_stack_t* stack,

1185 » tre_tnfa_t *tnfa)	1145 tre_ast_node_t* tree,

1186 {	1146 tre_tnfa_t* tnfa) {

1187 reg_errcode_t status = REG_OK;	1147 reg_errcode_t status = REG_OK;

1188 tre_addtags_symbol_t symbol;	1148 tre_addtags_symbol_t symbol;

1189 tre_ast_node_t node = tree; / Tree node we are currently looking at. */	1149 tre_ast_node_t* node = tree; /* Tree node we are currently looking at. */

1190 int bottom = tre_stack_num_objects(stack);	1150 int bottom = tre_stack_num_objects(stack);

1191 /* True for first pass (counting number of needed tags) */	1151 /* True for first pass (counting number of needed tags) */

1192 int first_pass = (mem == NULL \|\| tnfa == NULL);	1152 int first_pass = (mem == NULL \|\| tnfa == NULL);

1193 int regset, orig_regset;	1153 int regset, orig_regset;

1194 int num_tags = 0; /* Total number of tags. */	1154 int num_tags = 0; /* Total number of tags. */

1195 int num_minimals = 0;» /* Number of special minimal tags. */	1155 int num_minimals = 0; /* Number of special minimal tags. */

1196 int tag = 0;» /* The tag that is to be added next. */	1156 int tag = 0; /* The tag that is to be added next. */

1197 int next_tag = 1; /* Next tag to use after this one. */	1157 int next_tag = 1; /* Next tag to use after this one. */

1198 int parents;» / Stack of submatches the current submatch is	1158 int* parents; /* Stack of submatches the current submatch is

1199 » » contained in. */	1159 contained in. */

1200 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */	1160 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */

1201 tre_tag_states_t *saved_states;	1161 tre_tag_states_t* saved_states;

1202	1162

1203 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;	1163 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;

1204 if (!first_pass)	1164 if (!first_pass) {

1205 {	1165 tnfa->end_tag = 0;

1206 tnfa->end_tag = 0;	1166 tnfa->minimal_tags[0] = -1;

1207 tnfa->minimal_tags[0] = -1;	1167 }

1208 }

1209	1168

1210 regset = xmalloc(sizeof(regset) ((tnfa->num_submatches + 1) * 2));	1169 regset = xmalloc(sizeof(regset) ((tnfa->num_submatches + 1) * 2));

1211 if (regset == NULL)	1170 if (regset == NULL)

1212 return REG_ESPACE;	1171 return REG_ESPACE;

1213 regset[0] = -1;	1172 regset[0] = -1;

1214 orig_regset = regset;	1173 orig_regset = regset;

1215	1174

1216 parents = xmalloc(sizeof(parents) (tnfa->num_submatches + 1));	1175 parents = xmalloc(sizeof(parents) (tnfa->num_submatches + 1));

1217 if (parents == NULL)	1176 if (parents == NULL) {

1218 {	1177 xfree(regset);

1219 xfree(regset);	1178 return REG_ESPACE;

1220 return REG_ESPACE;	1179 }

1221 }

1222 parents[0] = -1;	1180 parents[0] = -1;

1223	1181

1224 saved_states = xmalloc(sizeof(saved_states) (tnfa->num_submatches + 1));	1182 saved_states = xmalloc(sizeof(saved_states) (tnfa->num_submatches + 1));

1225 if (saved_states == NULL)	1183 if (saved_states == NULL) {

1226 {	1184 xfree(regset);

1227 xfree(regset);	1185 xfree(parents);

1228 xfree(parents);	1186 return REG_ESPACE;

1229 return REG_ESPACE;	1187 } else {

1230 }	1188 unsigned int i;

1231 else	1189 for (i = 0; i <= tnfa->num_submatches; i++)

1232 {	1190 saved_states[i].tag = -1;

1233 unsigned int i;	1191 }

1234 for (i = 0; i <= tnfa->num_submatches; i++)

1235 » saved_states[i].tag = -1;

1236 }

1237	1192

1238 STACK_PUSH(stack, voidptr, node);	1193 STACK_PUSH(stack, voidptr, node);

1239 STACK_PUSH(stack, int, ADDTAGS_RECURSE);	1194 STACK_PUSH(stack, int, ADDTAGS_RECURSE);

1240	1195

1241 while (tre_stack_num_objects(stack) > bottom)	1196 while (tre_stack_num_objects(stack) > bottom) {

1242 {	1197 if (status != REG_OK)

1243 if (status != REG_OK)	1198 break;

1244 break;	1199

1245	1200 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);

1246 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);	1201 switch (symbol) {

1247 switch (symbol)	1202 case ADDTAGS_SET_SUBMATCH_END: {

1248 {	1203 int id = tre_stack_pop_int(stack);

1249	1204 int i;

1250 case ADDTAGS_SET_SUBMATCH_END:	1205

1251 {	1206 /* Add end of this submatch to regset. */

1252 int id = tre_stack_pop_int(stack);	1207 for (i = 0; regset[i] >= 0; i++)

1253 int i;	1208 ;

1254	1209 regset[i] = id * 2 + 1;

1255 /* Add end of this submatch to regset. */	1210 regset[i + 1] = -1;

1256 for (i = 0; regset[i] >= 0; i++);	1211

1257 regset[i] = id * 2 + 1;	1212 /* Pop this submatch from the parents stack. */

1258 regset[i + 1] = -1;	1213 for (i = 0; parents[i] >= 0; i++)

1259	1214 ;

1260 /* Pop this submatch from the parents stack. */	1215 parents[i - 1] = -1;

1261 for (i = 0; parents[i] >= 0; i++);	1216 break;

1262 parents[i - 1] = -1;	1217 }

1263 break;	1218

1264 }	1219 case ADDTAGS_RECURSE:

1265	1220 node = tre_stack_pop_voidptr(stack);

1266 case ADDTAGS_RECURSE:	1221

1267 node = tre_stack_pop_voidptr(stack);	1222 if (node->submatch_id >= 0) {

1268	1223 int id = node->submatch_id;

1269 if (node->submatch_id >= 0)	1224 int i;

1270 {	1225

1271 int id = node->submatch_id;	1226 /* Add start of this submatch to regset. */

1272 int i;	1227 for (i = 0; regset[i] >= 0; i++)

1273	1228 ;

1274	1229 regset[i] = id * 2;

1275 /* Add start of this submatch to regset. */	1230 regset[i + 1] = -1;

1276 for (i = 0; regset[i] >= 0; i++);	1231

1277 regset[i] = id * 2;	1232 if (!first_pass) {

1278 regset[i + 1] = -1;	1233 for (i = 0; parents[i] >= 0; i++)

1279	1234 ;

1280 if (!first_pass)	1235 tnfa->submatch_data[id].parents = NULL;

1281 {	1236 if (i > 0) {

1282 for (i = 0; parents[i] >= 0; i++);	1237 int* p = xmalloc(sizeof(p) (i + 1));

1283 tnfa->submatch_data[id].parents = NULL;	1238 if (p == NULL) {

1284 if (i > 0)	1239 status = REG_ESPACE;

1285 {	1240 break;

1286 int p = xmalloc(sizeof(p) * (i + 1));	1241 }

1287 if (p == NULL)	1242 assert(tnfa->submatch_data[id].parents == NULL);

1288 {	1243 tnfa->submatch_data[id].parents = p;

1289 status = REG_ESPACE;	1244 for (i = 0; parents[i] >= 0; i++)

1290 break;	1245 p[i] = parents[i];

1291 }	1246 p[i] = -1;

1292 assert(tnfa->submatch_data[id].parents == NULL);	1247 }

1293 tnfa->submatch_data[id].parents = p;	1248 }

1294 for (i = 0; parents[i] >= 0; i++)	1249

1295 p[i] = parents[i];	1250 /* Add end of this submatch to regset after processing this

1296 p[i] = -1;	1251 node. */

1297 }	1252 STACK_PUSHX(stack, int, node->submatch_id);

1298 }	1253 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);

1299	1254 }

1300 /* Add end of this submatch to regset after processing this	1255

1301 node. */	1256 switch (node->type) {

1302 STACK_PUSHX(stack, int, node->submatch_id);	1257 case LITERAL: {

1303 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);	1258 tre_literal_t* lit = node->obj;

1304 }	1259

1305	1260 if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit)) {

1306 switch (node->type)	1261 int i;

1307 {	1262 if (regset[0] >= 0) {

1308 case LITERAL:	1263 /* Regset is not empty, so add a tag before the

1309 {	1264 literal or backref. */

1310 tre_literal_t *lit = node->obj;	1265 if (!first_pass) {

1311	1266 status = tre_add_tag_left(mem, node, tag);

1312 if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit))	1267 tnfa->tag_directions[tag] = direction;

1313 {	1268 if (minimal_tag >= 0) {

1314 int i;	1269 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)

1315 if (regset[0] >= 0)	1270 ;

1316 {	1271 tnfa->minimal_tags[i] = tag;

1317 /* Regset is not empty, so add a tag before the	1272 tnfa->minimal_tags[i + 1] = minimal_tag;

1318 literal or backref. */	1273 tnfa->minimal_tags[i + 2] = -1;

1319 if (!first_pass)	1274 minimal_tag = -1;

1320 {	1275 num_minimals++;

1321 status = tre_add_tag_left(mem, node, tag);	1276 }

1322 tnfa->tag_directions[tag] = direction;	1277 tre_purge_regset(regset, tnfa, tag);

1323 if (minimal_tag >= 0)	1278 } else {

1324 {	1279 node->num_tags = 1;

1325 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);	1280 }

1326 tnfa->minimal_tags[i] = tag;	1281

1327 tnfa->minimal_tags[i + 1] = minimal_tag;	1282 regset[0] = -1;

1328 tnfa->minimal_tags[i + 2] = -1;	1283 tag = next_tag;

1329 minimal_tag = -1;	1284 num_tags++;

1330 num_minimals++;	1285 next_tag++;

1331 }	1286 }

1332 tre_purge_regset(regset, tnfa, tag);	1287 } else {

1333 }	1288 assert(!IS_TAG(lit));

1334 else	1289 }

1335 {	1290 break;

1336 node->num_tags = 1;	1291 }

1337 }	1292 case CATENATION: {

1338	1293 tre_catenation_t* cat = node->obj;

1339 regset[0] = -1;	1294 tre_ast_node_t* left = cat->left;

1340 tag = next_tag;	1295 tre_ast_node_t* right = cat->right;

1341 num_tags++;	1296 int reserved_tag = -1;

1342 next_tag++;	1297

1343 }	1298 /* After processing right child. */

1344 }	1299 STACK_PUSHX(stack, voidptr, node);

1345 else	1300 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);

1346 {	1301

1347 assert(!IS_TAG(lit));	1302 /* Process right child. */

1348 }	1303 STACK_PUSHX(stack, voidptr, right);

1349 break;	1304 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

1350 }	1305

1351 case CATENATION:	1306 /* After processing left child. */

1352 {	1307 STACK_PUSHX(stack, int, next_tag + left->num_tags);

1353 tre_catenation_t *cat = node->obj;	1308 if (left->num_tags > 0 && right->num_tags > 0) {

1354 tre_ast_node_t *left = cat->left;	1309 /* Reserve the next tag to the right child. */

1355 tre_ast_node_t *right = cat->right;	1310 reserved_tag = next_tag;

1356 int reserved_tag = -1;	1311 next_tag++;

1357	1312 }

1358	1313 STACK_PUSHX(stack, int, reserved_tag);

1359 /* After processing right child. */	1314 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);

1360 STACK_PUSHX(stack, voidptr, node);	1315

1361 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);	1316 /* Process left child. */

1362	1317 STACK_PUSHX(stack, voidptr, left);

1363 /* Process right child. */	1318 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

1364 STACK_PUSHX(stack, voidptr, right);	1319

1365 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);	1320 } break;

1366	1321 case ITERATION: {

1367 /* After processing left child. */	1322 tre_iteration_t* iter = node->obj;

1368 STACK_PUSHX(stack, int, next_tag + left->num_tags);	1323

1369 if (left->num_tags > 0 && right->num_tags > 0)	1324 if (first_pass) {

1370 {	1325 STACK_PUSHX(stack, int, regset[0] >= 0 \|\| iter->minimal);

1371 /* Reserve the next tag to the right child. */	1326 } else {

1372 reserved_tag = next_tag;	1327 STACK_PUSHX(stack, int, tag);

1373 next_tag++;	1328 STACK_PUSHX(stack, int, iter->minimal);

1374 }	1329 }

1375 STACK_PUSHX(stack, int, reserved_tag);	1330 STACK_PUSHX(stack, voidptr, node);

1376 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);	1331 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);

1377	1332

1378 /* Process left child. */	1333 STACK_PUSHX(stack, voidptr, iter->arg);

1379 STACK_PUSHX(stack, voidptr, left);	1334 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

1380 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);	1335

1381	1336 /* Regset is not empty, so add a tag here. */

1382 }	1337 if (regset[0] >= 0 \|\| iter->minimal) {

1383 break;	1338 if (!first_pass) {

1384 case ITERATION:	1339 int i;

1385 {	1340 status = tre_add_tag_left(mem, node, tag);

1386 tre_iteration_t *iter = node->obj;	1341 if (iter->minimal)

1387	1342 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;

1388 if (first_pass)	1343 else

1389 {	1344 tnfa->tag_directions[tag] = direction;

1390 STACK_PUSHX(stack, int, regset[0] >= 0 \|\| iter->minimal);	1345 if (minimal_tag >= 0) {

1391 }	1346 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)

1392 else	1347 ;

1393 {	1348 tnfa->minimal_tags[i] = tag;

1394 STACK_PUSHX(stack, int, tag);	1349 tnfa->minimal_tags[i + 1] = minimal_tag;

1395 STACK_PUSHX(stack, int, iter->minimal);	1350 tnfa->minimal_tags[i + 2] = -1;

1396 }	1351 minimal_tag = -1;

1397 STACK_PUSHX(stack, voidptr, node);	1352 num_minimals++;

1398 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);	1353 }

1399	1354 tre_purge_regset(regset, tnfa, tag);

1400 STACK_PUSHX(stack, voidptr, iter->arg);	1355 }

1401 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);	1356

1402	1357 regset[0] = -1;

1403 /* Regset is not empty, so add a tag here. */	1358 tag = next_tag;

1404 if (regset[0] >= 0 \|\| iter->minimal)	1359 num_tags++;

1405 {	1360 next_tag++;

1406 if (!first_pass)	1361 }

1407 {	1362 direction = TRE_TAG_MINIMIZE;

1408 int i;	1363 } break;

1409 status = tre_add_tag_left(mem, node, tag);	1364 case UNION: {

1410 if (iter->minimal)	1365 tre_union_t* uni = node->obj;

1411 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;	1366 tre_ast_node_t* left = uni->left;

1412 else	1367 tre_ast_node_t* right = uni->right;

1413 tnfa->tag_directions[tag] = direction;	1368 int left_tag;

1414 if (minimal_tag >= 0)	1369 int right_tag;

1415 {	1370

1416 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);	1371 if (regset[0] >= 0) {

1417 tnfa->minimal_tags[i] = tag;	1372 left_tag = next_tag;

1418 tnfa->minimal_tags[i + 1] = minimal_tag;	1373 right_tag = next_tag + 1;

1419 tnfa->minimal_tags[i + 2] = -1;	1374 } else {

1420 minimal_tag = -1;	1375 left_tag = tag;

1421 num_minimals++;	1376 right_tag = next_tag;

1422 }	1377 }

1423 tre_purge_regset(regset, tnfa, tag);	1378

1424 }	1379 /* After processing right child. */

1425	1380 STACK_PUSHX(stack, int, right_tag);

1426 regset[0] = -1;	1381 STACK_PUSHX(stack, int, left_tag);

1427 tag = next_tag;	1382 STACK_PUSHX(stack, voidptr, regset);

1428 num_tags++;	1383 STACK_PUSHX(stack, int, regset[0] >= 0);

1429 next_tag++;	1384 STACK_PUSHX(stack, voidptr, node);

1430 }	1385 STACK_PUSHX(stack, voidptr, right);

1431 direction = TRE_TAG_MINIMIZE;	1386 STACK_PUSHX(stack, voidptr, left);

1432 }	1387 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);

1433 break;	1388

1434 case UNION:	1389 /* Process right child. */

1435 {	1390 STACK_PUSHX(stack, voidptr, right);

1436 tre_union_t *uni = node->obj;	1391 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

1437 tre_ast_node_t *left = uni->left;	1392

1438 tre_ast_node_t *right = uni->right;	1393 /* After processing left child. */

1439 int left_tag;	1394 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);

1440 int right_tag;	1395

1441	1396 /* Process left child. */

1442 if (regset[0] >= 0)	1397 STACK_PUSHX(stack, voidptr, left);

1443 {	1398 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);

1444 left_tag = next_tag;	1399

1445 right_tag = next_tag + 1;	1400 /* Regset is not empty, so add a tag here. */

1446 }	1401 if (regset[0] >= 0) {

1447 else	1402 if (!first_pass) {

1448 {	1403 int i;

1449 left_tag = tag;	1404 status = tre_add_tag_left(mem, node, tag);

1450 right_tag = next_tag;	1405 tnfa->tag_directions[tag] = direction;

1451 }	1406 if (minimal_tag >= 0) {

1452	1407 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)

1453 /* After processing right child. */	1408 ;

1454 STACK_PUSHX(stack, int, right_tag);	1409 tnfa->minimal_tags[i] = tag;

1455 STACK_PUSHX(stack, int, left_tag);	1410 tnfa->minimal_tags[i + 1] = minimal_tag;

1456 STACK_PUSHX(stack, voidptr, regset);	1411 tnfa->minimal_tags[i + 2] = -1;

1457 STACK_PUSHX(stack, int, regset[0] >= 0);	1412 minimal_tag = -1;

1458 STACK_PUSHX(stack, voidptr, node);	1413 num_minimals++;

1459 STACK_PUSHX(stack, voidptr, right);	1414 }

1460 STACK_PUSHX(stack, voidptr, left);	1415 tre_purge_regset(regset, tnfa, tag);

1461 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);	1416 }

1462	1417

1463 /* Process right child. */	1418 regset[0] = -1;

1464 STACK_PUSHX(stack, voidptr, right);	1419 tag = next_tag;

1465 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);	1420 num_tags++;

1466	1421 next_tag++;

1467 /* After processing left child. */	1422 }

1468 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);	1423

1469	1424 if (node->num_submatches > 0) {

1470 /* Process left child. */	1425 /* The next two tags are reserved for markers. */

1471 STACK_PUSHX(stack, voidptr, left);	1426 next_tag++;

1472 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);	1427 tag = next_tag;

1473	1428 next_tag++;

1474 /* Regset is not empty, so add a tag here. */	1429 }

1475 if (regset[0] >= 0)	1430

1476 {	1431 break;

1477 if (!first_pass)	1432 }

1478 {	1433 }

1479 int i;	1434

1480 status = tre_add_tag_left(mem, node, tag);	1435 if (node->submatch_id >= 0) {

1481 tnfa->tag_directions[tag] = direction;	1436 int i;

1482 if (minimal_tag >= 0)	1437 /* Push this submatch on the parents stack. */

1483 {	1438 for (i = 0; parents[i] >= 0; i++)

1484 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);	1439 ;

1485 tnfa->minimal_tags[i] = tag;	1440 parents[i] = node->submatch_id;

1486 tnfa->minimal_tags[i + 1] = minimal_tag;	1441 parents[i + 1] = -1;

1487 tnfa->minimal_tags[i + 2] = -1;	1442 }

1488 minimal_tag = -1;	1443

1489 num_minimals++;	1444 break; /* end case: ADDTAGS_RECURSE */

1490 }	1445

1491 tre_purge_regset(regset, tnfa, tag);	1446 case ADDTAGS_AFTER_ITERATION: {

1492 }	1447 int minimal = 0;

1493	1448 int enter_tag;

1494 regset[0] = -1;	1449 node = tre_stack_pop_voidptr(stack);

1495 tag = next_tag;	1450 if (first_pass) {

1496 num_tags++;	1451 node->num_tags = ((tre_iteration_t*)node->obj)->arg->num_tags +

1497 next_tag++;	1452 tre_stack_pop_int(stack);

1498 }	1453 minimal_tag = -1;

1499	1454 } else {

1500 if (node->num_submatches > 0)	1455 minimal = tre_stack_pop_int(stack);

1501 {	1456 enter_tag = tre_stack_pop_int(stack);

1502 /* The next two tags are reserved for markers. */	1457 if (minimal)

1503 next_tag++;	1458 minimal_tag = enter_tag;

1504 tag = next_tag;	1459 }

1505 next_tag++;	1460

1506 }	1461 if (!first_pass) {

1507	1462 if (minimal)

1508 break;	1463 direction = TRE_TAG_MINIMIZE;

1509 }	1464 else

1510 }	1465 direction = TRE_TAG_MAXIMIZE;

1511	1466 }

1512 if (node->submatch_id >= 0)	1467 break;

1513 {	1468 }

1514 int i;	1469

1515 /* Push this submatch on the parents stack. */	1470 case ADDTAGS_AFTER_CAT_LEFT: {

1516 for (i = 0; parents[i] >= 0; i++);	1471 int new_tag = tre_stack_pop_int(stack);

1517 parents[i] = node->submatch_id;	1472 next_tag = tre_stack_pop_int(stack);

1518 parents[i + 1] = -1;	1473 if (new_tag >= 0) {

1519 }	1474 tag = new_tag;

1520	1475 }

1521 break; /* end case: ADDTAGS_RECURSE */	1476 break;

1522	1477 }

1523 case ADDTAGS_AFTER_ITERATION:	1478

1524 {	1479 case ADDTAGS_AFTER_CAT_RIGHT:

1525 int minimal = 0;	1480 node = tre_stack_pop_voidptr(stack);

1526 int enter_tag;	1481 if (first_pass)

1527 node = tre_stack_pop_voidptr(stack);	1482 node->num_tags = ((tre_catenation_t*)node->obj)->left->num_tags +

1528 if (first_pass)	1483 ((tre_catenation_t*)node->obj)->right->num_tags;

1529 {	1484 break;

1530 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags	1485

1531 + tre_stack_pop_int(stack);	1486 case ADDTAGS_AFTER_UNION_LEFT:

1532 minimal_tag = -1;	1487 /* Lift the bottom of the `regset' array so that when processing

1533 }	1488 the right operand the items currently in the array are

1534 else	1489 invisible. The original bottom was saved at ADDTAGS_UNION and

1535 {	1490 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */

1536 minimal = tre_stack_pop_int(stack);	1491 while (*regset >= 0)

1537 enter_tag = tre_stack_pop_int(stack);	1492 regset++;

1538 if (minimal)	1493 break;

1539 minimal_tag = enter_tag;	1494

1540 }	1495 case ADDTAGS_AFTER_UNION_RIGHT: {

1541	1496 int added_tags, tag_left, tag_right;

1542 if (!first_pass)	1497 tre_ast_node_t* left = tre_stack_pop_voidptr(stack);

1543 {	1498 tre_ast_node_t* right = tre_stack_pop_voidptr(stack);

1544 if (minimal)	1499 node = tre_stack_pop_voidptr(stack);

1545 direction = TRE_TAG_MINIMIZE;	1500 added_tags = tre_stack_pop_int(stack);

1546 else	1501 if (first_pass) {

1547 direction = TRE_TAG_MAXIMIZE;	1502 node->num_tags = ((tre_union_t*)node->obj)->left->num_tags +

1548 }	1503 ((tre_union_t*)node->obj)->right->num_tags +

1549 break;	1504 added_tags + ((node->num_submatches > 0) ? 2 : 0);

1550 }	1505 }

1551	1506 regset = tre_stack_pop_voidptr(stack);

1552 case ADDTAGS_AFTER_CAT_LEFT:	1507 tag_left = tre_stack_pop_int(stack);

1553 {	1508 tag_right = tre_stack_pop_int(stack);

1554 int new_tag = tre_stack_pop_int(stack);	1509

1555 next_tag = tre_stack_pop_int(stack);	1510 /* Add tags after both children, the left child gets a smaller

1556 if (new_tag >= 0)	1511 tag than the right child. This guarantees that we prefer

1557 {	1512 the left child over the right child. */

1558 tag = new_tag;	1513 /* XXX - This is not always necessary (if the children have

1559 }	1514 tags which must be seen for every match of that child). */

1560 break;	1515 /* XXX - Check if this is the only place where tre_add_tag_right

1561 }	1516 is used. If so, use tre_add_tag_left (putting the tag before

1562	1517 the child as opposed after the child) and throw away

1563 case ADDTAGS_AFTER_CAT_RIGHT:	1518 tre_add_tag_right. */

1564 node = tre_stack_pop_voidptr(stack);	1519 if (node->num_submatches > 0) {

1565 if (first_pass)	1520 if (!first_pass) {

1566 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags	1521 status = tre_add_tag_right(mem, left, tag_left);

1567 + ((tre_catenation_t *)node->obj)->right->num_tags;	1522 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;

1568 break;	1523 if (status == REG_OK)

1569	1524 status = tre_add_tag_right(mem, right, tag_right);

1570 case ADDTAGS_AFTER_UNION_LEFT:	1525 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;

1571 /* Lift the bottom of the `regset' array so that when processing	1526 }

1572 the right operand the items currently in the array are	1527 num_tags += 2;

1573 invisible. The original bottom was saved at ADDTAGS_UNION and	1528 }

1574 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */	1529 direction = TRE_TAG_MAXIMIZE;

1575 while (*regset >= 0)	1530 break;

1576 regset++;	1531 }

1577 break;	1532

1578	1533 default:

1579 case ADDTAGS_AFTER_UNION_RIGHT:	1534 assert(0);

1580 {	1535 break;

1581 int added_tags, tag_left, tag_right;	1536

1582 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);	1537 } /* end switch(symbol) */

1583 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);	1538 } /* end while(tre_stack_num_objects(stack) > bottom) */

1584 node = tre_stack_pop_voidptr(stack);

1585 added_tags = tre_stack_pop_int(stack);

1586 if (first_pass)

1587 {

1588 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags

1589 + ((tre_union_t *)node->obj)->right->num_tags + added_tags

1590 + ((node->num_submatches > 0) ? 2 : 0);

1591 }

1592 regset = tre_stack_pop_voidptr(stack);

1593 tag_left = tre_stack_pop_int(stack);

1594 tag_right = tre_stack_pop_int(stack);

1595

1596 /* Add tags after both children, the left child gets a smaller

1597 tag than the right child. This guarantees that we prefer

1598 the left child over the right child. */

1599 /* XXX - This is not always necessary (if the children have

1600 tags which must be seen for every match of that child). */

1601 /* XXX - Check if this is the only place where tre_add_tag_right

1602 is used. If so, use tre_add_tag_left (putting the tag before

1603 the child as opposed after the child) and throw away

1604 tre_add_tag_right. */

1605 if (node->num_submatches > 0)

1606 {

1607 if (!first_pass)

1608 {

1609 status = tre_add_tag_right(mem, left, tag_left);

1610 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;

1611 if (status == REG_OK)

1612 status = tre_add_tag_right(mem, right, tag_right);

1613 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;

1614 }

1615 num_tags += 2;

1616 }

1617 direction = TRE_TAG_MAXIMIZE;

1618 break;

1619 }

1620

1621 default:

1622 assert(0);

1623 break;

1624

1625 } /* end switch(symbol) */

1626 } /* end while(tre_stack_num_objects(stack) > bottom) */

1627	1539

1628 if (!first_pass)	1540 if (!first_pass)

1629 tre_purge_regset(regset, tnfa, tag);	1541 tre_purge_regset(regset, tnfa, tag);

1630	1542

1631 if (!first_pass && minimal_tag >= 0)	1543 if (!first_pass && minimal_tag >= 0) {

1632 {	1544 int i;

1633 int i;	1545 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)

1634 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);	1546 ;

1635 tnfa->minimal_tags[i] = tag;	1547 tnfa->minimal_tags[i] = tag;

1636 tnfa->minimal_tags[i + 1] = minimal_tag;	1548 tnfa->minimal_tags[i + 1] = minimal_tag;

1637 tnfa->minimal_tags[i + 2] = -1;	1549 tnfa->minimal_tags[i + 2] = -1;

1638 minimal_tag = -1;	1550 minimal_tag = -1;

1639 num_minimals++;	1551 num_minimals++;

1640 }	1552 }

1641	1553

1642 assert(tree->num_tags == num_tags);	1554 assert(tree->num_tags == num_tags);

1643 tnfa->end_tag = num_tags;	1555 tnfa->end_tag = num_tags;

1644 tnfa->num_tags = num_tags;	1556 tnfa->num_tags = num_tags;

1645 tnfa->num_minimals = num_minimals;	1557 tnfa->num_minimals = num_minimals;

1646 xfree(orig_regset);	1558 xfree(orig_regset);

1647 xfree(parents);	1559 xfree(parents);

1648 xfree(saved_states);	1560 xfree(saved_states);

1649 return status;	1561 return status;

1650 }	1562 }

1651	1563

1652

1653

1654 /*	1564 /*

1655 AST to TNFA compilation routines.	1565 AST to TNFA compilation routines.

1656 */	1566 */

1657	1567

1658 typedef enum {	1568 typedef enum { COPY_RECURSE, COPY_SET_RESULT_PTR } tre_copyast_symbol_t;

1659 COPY_RECURSE,

1660 COPY_SET_RESULT_PTR

1661 } tre_copyast_symbol_t;

1662	1569

1663 /* Flags for tre_copy_ast(). */	1570 /* Flags for tre_copy_ast(). */

1664 #define COPY_REMOVE_TAGS» 1	1571 #define COPY_REMOVE_TAGS 1

1665 #define COPY_MAXIMIZE_FIRST_TAG» 2	1572 #define COPY_MAXIMIZE_FIRST_TAG 2

1666	1573

1667 static reg_errcode_t	1574 static reg_errcode_t tre_copy_ast(tre_mem_t mem,

1668 tre_copy_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,	1575 tre_stack_t* stack,

1669 » int flags, int pos_add, tre_tag_direction_t tag_directions,	1576 tre_ast_node_t* ast,

1670 » tre_ast_node_t *copy, int max_pos)	1577 int flags,

1671 {	1578 int* pos_add,

	1579 tre_tag_direction_t* tag_directions,

	1580 tre_ast_node_t** copy,

	1581 int* max_pos) {

1672 reg_errcode_t status = REG_OK;	1582 reg_errcode_t status = REG_OK;

1673 int bottom = tre_stack_num_objects(stack);	1583 int bottom = tre_stack_num_objects(stack);

1674 int num_copied = 0;	1584 int num_copied = 0;

1675 int first_tag = 1;	1585 int first_tag = 1;

1676 tre_ast_node_t **result = copy;	1586 tre_ast_node_t** result = copy;

1677 tre_copyast_symbol_t symbol;	1587 tre_copyast_symbol_t symbol;

1678	1588

1679 STACK_PUSH(stack, voidptr, ast);	1589 STACK_PUSH(stack, voidptr, ast);

1680 STACK_PUSH(stack, int, COPY_RECURSE);	1590 STACK_PUSH(stack, int, COPY_RECURSE);

1681	1591

1682 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)	1592 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {

1683 {	1593 tre_ast_node_t* node;

1684 tre_ast_node_t *node;	1594 if (status != REG_OK)

1685 if (status != REG_OK)	1595 break;

1686 » break;

1687	1596

1688 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);	1597 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);

1689 switch (symbol)	1598 switch (symbol) {

1690 » {	1599 case COPY_SET_RESULT_PTR:

1691 » case COPY_SET_RESULT_PTR:	1600 result = tre_stack_pop_voidptr(stack);

1692 » result = tre_stack_pop_voidptr(stack);	1601 break;

1693 » break;	1602 case COPY_RECURSE:

1694 » case COPY_RECURSE:	1603 node = tre_stack_pop_voidptr(stack);

1695 » node = tre_stack_pop_voidptr(stack);	1604 switch (node->type) {

1696 » switch (node->type)	1605 case LITERAL: {

1697 » {	1606 tre_literal_t* lit = node->obj;

1698 » case LITERAL:	1607 int pos = lit->position;

1699 » {	1608 int min = lit->code_min;

1700 » » tre_literal_t *lit = node->obj;	1609 int max = lit->code_max;

1701 » » int pos = lit->position;	1610 if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit)) {

1702 » » int min = lit->code_min;	1611 /* XXX - e.g. [ab] has only one position but two

1703 » » int max = lit->code_max;	1612 nodes, so we are creating holes in the state space

1704 » » if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit))	1613 here. Not fatal, just wastes memory. */

1705 » » {	1614 pos += *pos_add;

1706 » » /* XXX - e.g. [ab] has only one position but two	1615 num_copied++;

1707 » » nodes, so we are creating holes in the state space	1616 } else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) {

1708 » » here. Not fatal, just wastes memory. */	1617 /* Change this tag to empty. */

1709 » » pos += *pos_add;	1618 min = EMPTY;

1710 » » num_copied++;	1619 max = pos = -1;

1711 » » }	1620 } else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) &&

1712 » » else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))	1621 first_tag) {

1713 » » {	1622 /* Maximize the first tag. */

1714 » » /* Change this tag to empty. */	1623 tag_directions[max] = TRE_TAG_MAXIMIZE;

1715 » » min = EMPTY;	1624 first_tag = 0;

1716 » » max = pos = -1;	1625 }

1717 » » }	1626 *result = tre_ast_new_literal(mem, min, max, pos);

1718 » » else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)	1627 if (*result == NULL)

1719 » » » && first_tag)	1628 status = REG_ESPACE;

1720 » » {	1629 else {

1721 » » /* Maximize the first tag. */	1630 tre_literal_t* p = (*result)->obj;

1722 » » tag_directions[max] = TRE_TAG_MAXIMIZE;	1631 p->class = lit->class;

1723 » » first_tag = 0;	1632 p->neg_classes = lit->neg_classes;

1724 » » }	1633 }

1725 » » *result = tre_ast_new_literal(mem, min, max, pos);

1726 » » if (*result == NULL)

1727 » » status = REG_ESPACE;

1728 » » else {

1729 » » tre_literal_t p = (result)->obj;

1730 » » p->class = lit->class;

1731 » » p->neg_classes = lit->neg_classes;

1732 » » }

1733	1634

1734 » » if (pos > *max_pos)	1635 if (pos > *max_pos)

1735 » » *max_pos = pos;	1636 *max_pos = pos;

1736 » » break;	1637 break;

1737 » }	1638 }

1738 » case UNION:	1639 case UNION: {

1739 » {	1640 tre_union_t* uni = node->obj;

1740 » » tre_union_t *uni = node->obj;	1641 tre_union_t* tmp;

1741 » » tre_union_t *tmp;	1642 *result = tre_ast_new_union(mem, uni->left, uni->right);

1742 » » *result = tre_ast_new_union(mem, uni->left, uni->right);	1643 if (*result == NULL) {

1743 » » if (*result == NULL)	1644 status = REG_ESPACE;

1744 » » {	1645 break;

1745 » » status = REG_ESPACE;	1646 }

1746 » » break;	1647 tmp = (*result)->obj;

1747 » » }	1648 result = &tmp->left;

1748 » » tmp = (*result)->obj;	1649 STACK_PUSHX(stack, voidptr, uni->right);

1749 » » result = &tmp->left;	1650 STACK_PUSHX(stack, int, COPY_RECURSE);

1750 » » STACK_PUSHX(stack, voidptr, uni->right);	1651 STACK_PUSHX(stack, voidptr, &tmp->right);

1751 » » STACK_PUSHX(stack, int, COPY_RECURSE);	1652 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);

1752 » » STACK_PUSHX(stack, voidptr, &tmp->right);	1653 STACK_PUSHX(stack, voidptr, uni->left);

1753 » » STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);	1654 STACK_PUSHX(stack, int, COPY_RECURSE);

1754 » » STACK_PUSHX(stack, voidptr, uni->left);	1655 break;

1755 » » STACK_PUSHX(stack, int, COPY_RECURSE);	1656 }

1756 » » break;	1657 case CATENATION: {

1757 » }	1658 tre_catenation_t* cat = node->obj;

1758 » case CATENATION:	1659 tre_catenation_t* tmp;

1759 » {	1660 *result = tre_ast_new_catenation(mem, cat->left, cat->right);

1760 » » tre_catenation_t *cat = node->obj;	1661 if (*result == NULL) {

1761 » » tre_catenation_t *tmp;	1662 status = REG_ESPACE;

1762 » » *result = tre_ast_new_catenation(mem, cat->left, cat->right);	1663 break;

1763 » » if (*result == NULL)	1664 }

1764 » » {	1665 tmp = (*result)->obj;

1765 » » status = REG_ESPACE;	1666 tmp->left = NULL;

1766 » » break;	1667 tmp->right = NULL;

1767 » » }	1668 result = &tmp->left;

1768 » » tmp = (*result)->obj;

1769 » » tmp->left = NULL;

1770 » » tmp->right = NULL;

1771 » » result = &tmp->left;

1772	1669

1773 » » STACK_PUSHX(stack, voidptr, cat->right);	1670 STACK_PUSHX(stack, voidptr, cat->right);

1774 » » STACK_PUSHX(stack, int, COPY_RECURSE);	1671 STACK_PUSHX(stack, int, COPY_RECURSE);

1775 » » STACK_PUSHX(stack, voidptr, &tmp->right);	1672 STACK_PUSHX(stack, voidptr, &tmp->right);

1776 » » STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);	1673 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);

1777 » » STACK_PUSHX(stack, voidptr, cat->left);	1674 STACK_PUSHX(stack, voidptr, cat->left);

1778 » » STACK_PUSHX(stack, int, COPY_RECURSE);	1675 STACK_PUSHX(stack, int, COPY_RECURSE);

1779 » » break;	1676 break;

1780 » }	1677 }

1781 » case ITERATION:	1678 case ITERATION: {

1782 » {	1679 tre_iteration_t* iter = node->obj;

1783 » » tre_iteration_t *iter = node->obj;	1680 STACK_PUSHX(stack, voidptr, iter->arg);

1784 » » STACK_PUSHX(stack, voidptr, iter->arg);	1681 STACK_PUSHX(stack, int, COPY_RECURSE);

1785 » » STACK_PUSHX(stack, int, COPY_RECURSE);	1682 *result = tre_ast_new_iter(mem, iter->arg, iter->min, iter->max,

1786 » » *result = tre_ast_new_iter(mem, iter->arg, iter->min,	1683 iter->minimal);

1787 » » » » » iter->max, iter->minimal);	1684 if (*result == NULL) {

1788 » » if (*result == NULL)	1685 status = REG_ESPACE;

1789 » » {	1686 break;

1790 » » status = REG_ESPACE;	1687 }

1791 » » break;	1688 iter = (*result)->obj;

1792 » » }	1689 result = &iter->arg;

1793 » » iter = (*result)->obj;	1690 break;

1794 » » result = &iter->arg;	1691 }

1795 » » break;	1692 default:

1796 » }	1693 assert(0);

1797 » default:	1694 break;

1798 » assert(0);	1695 }

1799 » break;	1696 break;

1800 » }

1801 » break;

1802 » }

1803 }	1697 }

	1698 }

1804 *pos_add += num_copied;	1699 *pos_add += num_copied;

1805 return status;	1700 return status;

1806 }	1701 }

1807	1702

1808 typedef enum {	1703 typedef enum { EXPAND_RECURSE, EXPAND_AFTER_ITER } tre_expand_ast_symbol_t;

1809 EXPAND_RECURSE,

1810 EXPAND_AFTER_ITER

1811 } tre_expand_ast_symbol_t;

1812	1704

1813 /* Expands each iteration node that has a finite nonzero minimum or maximum	1705 /* Expands each iteration node that has a finite nonzero minimum or maximum

1814 iteration count to a catenated sequence of copies of the node. */	1706 iteration count to a catenated sequence of copies of the node. */

1815 static reg_errcode_t	1707 static reg_errcode_t tre_expand_ast(tre_mem_t mem,

1816 tre_expand_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,	1708 tre_stack_t* stack,

1817 » int position, tre_tag_direction_t tag_directions)	1709 tre_ast_node_t* ast,

1818 {	1710 int* position,

	1711 tre_tag_direction_t* tag_directions) {

1819 reg_errcode_t status = REG_OK;	1712 reg_errcode_t status = REG_OK;

1820 int bottom = tre_stack_num_objects(stack);	1713 int bottom = tre_stack_num_objects(stack);

1821 int pos_add = 0;	1714 int pos_add = 0;

1822 int pos_add_total = 0;	1715 int pos_add_total = 0;

1823 int max_pos = 0;	1716 int max_pos = 0;

1824 int iter_depth = 0;	1717 int iter_depth = 0;

1825	1718

1826 STACK_PUSHR(stack, voidptr, ast);	1719 STACK_PUSHR(stack, voidptr, ast);

1827 STACK_PUSHR(stack, int, EXPAND_RECURSE);	1720 STACK_PUSHR(stack, int, EXPAND_RECURSE);

1828 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)	1721 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {

1829 {	1722 tre_ast_node_t* node;

1830 tre_ast_node_t *node;	1723 tre_expand_ast_symbol_t symbol;

1831 tre_expand_ast_symbol_t symbol;

1832	1724

1833 if (status != REG_OK)	1725 if (status != REG_OK)

1834 » break;	1726 break;

1835	1727

1836 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);	1728 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);

1837 node = tre_stack_pop_voidptr(stack);	1729 node = tre_stack_pop_voidptr(stack);

1838 switch (symbol)	1730 switch (symbol) {

1839 » {	1731 case EXPAND_RECURSE:

1840 » case EXPAND_RECURSE:	1732 switch (node->type) {

1841 » switch (node->type)	1733 case LITERAL: {

1842 » {	1734 tre_literal_t* lit = node->obj;

1843 » case LITERAL:	1735 if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit)) {

1844 » {	1736 lit->position += pos_add;

1845 » » tre_literal_t *lit= node->obj;	1737 if (lit->position > max_pos)

1846 » » if (!IS_SPECIAL(lit) \|\| IS_BACKREF(lit))	1738 max_pos = lit->position;

1847 » » {	1739 }

1848 » » lit->position += pos_add;	1740 break;

1849 » » if (lit->position > max_pos)	1741 }

1850 » » max_pos = lit->position;	1742 case UNION: {

1851 » » }	1743 tre_union_t* uni = node->obj;

1852 » » break;	1744 STACK_PUSHX(stack, voidptr, uni->right);

1853 » }	1745 STACK_PUSHX(stack, int, EXPAND_RECURSE);

1854 » case UNION:	1746 STACK_PUSHX(stack, voidptr, uni->left);

1855 » {	1747 STACK_PUSHX(stack, int, EXPAND_RECURSE);

1856 » » tre_union_t *uni = node->obj;	1748 break;

1857 » » STACK_PUSHX(stack, voidptr, uni->right);	1749 }

1858 » » STACK_PUSHX(stack, int, EXPAND_RECURSE);	1750 case CATENATION: {

1859 » » STACK_PUSHX(stack, voidptr, uni->left);	1751 tre_catenation_t* cat = node->obj;

1860 » » STACK_PUSHX(stack, int, EXPAND_RECURSE);	1752 STACK_PUSHX(stack, voidptr, cat->right);

1861 » » break;	1753 STACK_PUSHX(stack, int, EXPAND_RECURSE);

1862 » }	1754 STACK_PUSHX(stack, voidptr, cat->left);

1863 » case CATENATION:	1755 STACK_PUSHX(stack, int, EXPAND_RECURSE);

1864 » {	1756 break;

1865 » » tre_catenation_t *cat = node->obj;	1757 }

1866 » » STACK_PUSHX(stack, voidptr, cat->right);	1758 case ITERATION: {

1867 » » STACK_PUSHX(stack, int, EXPAND_RECURSE);	1759 tre_iteration_t* iter = node->obj;

1868 » » STACK_PUSHX(stack, voidptr, cat->left);	1760 STACK_PUSHX(stack, int, pos_add);

1869 » » STACK_PUSHX(stack, int, EXPAND_RECURSE);	1761 STACK_PUSHX(stack, voidptr, node);

1870 » » break;	1762 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);

1871 » }	1763 STACK_PUSHX(stack, voidptr, iter->arg);

1872 » case ITERATION:	1764 STACK_PUSHX(stack, int, EXPAND_RECURSE);

1873 » {	1765 /* If we are going to expand this node at EXPAND_AFTER_ITER

1874 » » tre_iteration_t *iter = node->obj;	1766 then don't increase the `pos' fields of the nodes now, it

1875 » » STACK_PUSHX(stack, int, pos_add);	1767 will get done when expanding. */

1876 » » STACK_PUSHX(stack, voidptr, node);	1768 if (iter->min > 1 \|\| iter->max > 1)

1877 » » STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);	1769 pos_add = 0;

1878 » » STACK_PUSHX(stack, voidptr, iter->arg);	1770 iter_depth++;

1879 » » STACK_PUSHX(stack, int, EXPAND_RECURSE);	1771 break;

1880 » » /* If we are going to expand this node at EXPAND_AFTER_ITER	1772 }

1881 » » then don't increase the `pos' fields of the nodes now, it	1773 default:

1882 » » will get done when expanding. */	1774 assert(0);

1883 » » if (iter->min > 1 \|\| iter->max > 1)	1775 break;

1884 » » pos_add = 0;	1776 }

1885 » » iter_depth++;	1777 break;

1886 » » break;	1778 case EXPAND_AFTER_ITER: {

1887 » }	1779 tre_iteration_t* iter = node->obj;

1888 » default:	1780 int pos_add_last;

1889 » assert(0);	1781 pos_add = tre_stack_pop_int(stack);

1890 » break;	1782 pos_add_last = pos_add;

1891 » }	1783 if (iter->min > 1 \|\| iter->max > 1) {

1892 » break;	1784 tre_ast_node_t seq1 = NULL, seq2 = NULL;

1893 » case EXPAND_AFTER_ITER:	1785 int j;

1894 » {	1786 int pos_add_save = pos_add;

1895 » tre_iteration_t *iter = node->obj;

1896 » int pos_add_last;

1897 » pos_add = tre_stack_pop_int(stack);

1898 » pos_add_last = pos_add;

1899 » if (iter->min > 1 \|\| iter->max > 1)

1900 » {

1901 » » tre_ast_node_t seq1 = NULL, seq2 = NULL;

1902 » » int j;

1903 » » int pos_add_save = pos_add;

1904	1787

1905 » » /* Create a catenated sequence of copies of the node. */	1788 /* Create a catenated sequence of copies of the node. */

1906 » » for (j = 0; j < iter->min; j++)	1789 for (j = 0; j < iter->min; j++) {

1907 » » {	1790 tre_ast_node_t* copy;

1908 » » tre_ast_node_t *copy;	1791 /* Remove tags from all but the last copy. */

1909 » » /* Remove tags from all but the last copy. */	1792 int flags = ((j + 1 < iter->min) ? COPY_REMOVE_TAGS

1910 » » int flags = ((j + 1 < iter->min)	1793 : COPY_MAXIMIZE_FIRST_TAG);

1911 » » » » ? COPY_REMOVE_TAGS	1794 pos_add_save = pos_add;

1912 » » » » : COPY_MAXIMIZE_FIRST_TAG);	1795 status = tre_copy_ast(mem, stack, iter->arg, flags, &pos_add,

1913 » » pos_add_save = pos_add;	1796 tag_directions, &copy, &max_pos);

1914 » » status = tre_copy_ast(mem, stack, iter->arg, flags,	1797 if (status != REG_OK)

1915 » » » » » &pos_add, tag_directions, &copy,	1798 return status;

1916 » » » » » &max_pos);	1799 if (seq1 != NULL)

1917 » » if (status != REG_OK)	1800 seq1 = tre_ast_new_catenation(mem, seq1, copy);

1918 » » return status;	1801 else

1919 » » if (seq1 != NULL)	1802 seq1 = copy;

1920 » » seq1 = tre_ast_new_catenation(mem, seq1, copy);	1803 if (seq1 == NULL)

1921 » » else	1804 return REG_ESPACE;

1922 » » seq1 = copy;	1805 }

1923 » » if (seq1 == NULL)

1924 » » return REG_ESPACE;

1925 » » }

1926	1806

1927 » » if (iter->max == -1)	1807 if (iter->max == -1) {

1928 » » {	1808 /* No upper limit. */

1929 » » /* No upper limit. */	1809 pos_add_save = pos_add;

1930 » » pos_add_save = pos_add;	1810 status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL,

1931 » » status = tre_copy_ast(mem, stack, iter->arg, 0,	1811 &seq2, &max_pos);

1932 » » » » » &pos_add, NULL, &seq2, &max_pos);	1812 if (status != REG_OK)

1933 » » if (status != REG_OK)	1813 return status;

1934 » » return status;	1814 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);

1935 » » seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);	1815 if (seq2 == NULL)

1936 » » if (seq2 == NULL)	1816 return REG_ESPACE;

1937 » » return REG_ESPACE;	1817 } else {

1938 » » }	1818 for (j = iter->min; j < iter->max; j++) {

1939 » » else	1819 tre_ast_node_t tmp, copy;

1940 » » {	1820 pos_add_save = pos_add;

1941 » » for (j = iter->min; j < iter->max; j++)	1821 status = tre_copy_ast(mem, stack, iter->arg, 0, &pos_add, NULL,

1942 » » {	1822 &copy, &max_pos);

1943 » » » tre_ast_node_t tmp, copy;	1823 if (status != REG_OK)

1944 » » » pos_add_save = pos_add;	1824 return status;

1945 » » » status = tre_copy_ast(mem, stack, iter->arg, 0,	1825 if (seq2 != NULL)

1946 » » » » » &pos_add, NULL, &copy, &max_pos);	1826 seq2 = tre_ast_new_catenation(mem, copy, seq2);

1947 » » » if (status != REG_OK)	1827 else

1948 » » » return status;	1828 seq2 = copy;

1949 » » » if (seq2 != NULL)	1829 if (seq2 == NULL)

1950 » » » seq2 = tre_ast_new_catenation(mem, copy, seq2);	1830 return REG_ESPACE;

1951 » » » else	1831 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);

1952 » » » seq2 = copy;	1832 if (tmp == NULL)

1953 » » » if (seq2 == NULL)	1833 return REG_ESPACE;

1954 » » » return REG_ESPACE;	1834 seq2 = tre_ast_new_union(mem, tmp, seq2);

1955 » » » tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);	1835 if (seq2 == NULL)

1956 » » » if (tmp == NULL)	1836 return REG_ESPACE;

1957 » » » return REG_ESPACE;	1837 }

1958 » » » seq2 = tre_ast_new_union(mem, tmp, seq2);	1838 }

1959 » » » if (seq2 == NULL)

1960 » » » return REG_ESPACE;

1961 » » }

1962 » » }

1963	1839

1964 » » pos_add = pos_add_save;	1840 pos_add = pos_add_save;

1965 » » if (seq1 == NULL)	1841 if (seq1 == NULL)

1966 » » seq1 = seq2;	1842 seq1 = seq2;

1967 » » else if (seq2 != NULL)	1843 else if (seq2 != NULL)

1968 » » seq1 = tre_ast_new_catenation(mem, seq1, seq2);	1844 seq1 = tre_ast_new_catenation(mem, seq1, seq2);

1969 » » if (seq1 == NULL)	1845 if (seq1 == NULL)

1970 » » return REG_ESPACE;	1846 return REG_ESPACE;

1971 » » node->obj = seq1->obj;	1847 node->obj = seq1->obj;

1972 » » node->type = seq1->type;	1848 node->type = seq1->type;

1973 » }	1849 }

1974	1850

1975 » iter_depth--;	1851 iter_depth--;

1976 » pos_add_total += pos_add - pos_add_last;	1852 pos_add_total += pos_add - pos_add_last;

1977 » if (iter_depth == 0)	1853 if (iter_depth == 0)

1978 » pos_add = pos_add_total;	1854 pos_add = pos_add_total;

1979	1855

1980 » break;	1856 break;

1981 » }	1857 }

1982 » default:	1858 default:

1983 » assert(0);	1859 assert(0);

1984 » break;	1860 break;

1985 » }

1986 }	1861 }

	1862 }

1987	1863

1988 *position += pos_add_total;	1864 *position += pos_add_total;

1989	1865

1990 /* `max_pos' should never be larger than `*position' if the above	1866 /* `max_pos' should never be larger than `*position' if the above

1991 code works, but just an extra safeguard let's make sure	1867 code works, but just an extra safeguard let's make sure

1992 `*position' is set large enough so enough memory will be	1868 `*position' is set large enough so enough memory will be

1993 allocated for the transition table. */	1869 allocated for the transition table. */

1994 if (max_pos > *position)	1870 if (max_pos > *position)

1995 *position = max_pos;	1871 *position = max_pos;

1996	1872

1997 return status;	1873 return status;

1998 }	1874 }

1999	1875

2000 static tre_pos_and_tags_t *	1876 static tre_pos_and_tags_t* tre_set_empty(tre_mem_t mem) {

2001 tre_set_empty(tre_mem_t mem)	1877 tre_pos_and_tags_t* new_set;

2002 {

2003 tre_pos_and_tags_t *new_set;

2004	1878

2005 new_set = tre_mem_calloc(mem, sizeof(*new_set));	1879 new_set = tre_mem_calloc(mem, sizeof(*new_set));

2006 if (new_set == NULL)	1880 if (new_set == NULL)

2007 return NULL;	1881 return NULL;

2008	1882

2009 new_set[0].position = -1;	1883 new_set[0].position = -1;

2010 new_set[0].code_min = -1;	1884 new_set[0].code_min = -1;

2011 new_set[0].code_max = -1;	1885 new_set[0].code_max = -1;

2012	1886

2013 return new_set;	1887 return new_set;

2014 }	1888 }

2015	1889

2016 static tre_pos_and_tags_t *	1890 static tre_pos_and_tags_t* tre_set_one(tre_mem_t mem,

2017 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,	1891 int position,

2018 » tre_ctype_t class, tre_ctype_t *neg_classes, int backref)	1892 int code_min,

2019 {	1893 int code_max,

2020 tre_pos_and_tags_t *new_set;	1894 tre_ctype_t class,

	1895 tre_ctype_t* neg_classes,

	1896 int backref) {

	1897 tre_pos_and_tags_t* new_set;

2021	1898

2022 new_set = tre_mem_calloc(mem, sizeof(new_set) 2);	1899 new_set = tre_mem_calloc(mem, sizeof(new_set) 2);

2023 if (new_set == NULL)	1900 if (new_set == NULL)

2024 return NULL;	1901 return NULL;

2025	1902

2026 new_set[0].position = position;	1903 new_set[0].position = position;

2027 new_set[0].code_min = code_min;	1904 new_set[0].code_min = code_min;

2028 new_set[0].code_max = code_max;	1905 new_set[0].code_max = code_max;

2029 new_set[0].class = class;	1906 new_set[0].class = class;

2030 new_set[0].neg_classes = neg_classes;	1907 new_set[0].neg_classes = neg_classes;

2031 new_set[0].backref = backref;	1908 new_set[0].backref = backref;

2032 new_set[1].position = -1;	1909 new_set[1].position = -1;

2033 new_set[1].code_min = -1;	1910 new_set[1].code_min = -1;

2034 new_set[1].code_max = -1;	1911 new_set[1].code_max = -1;

2035	1912

2036 return new_set;	1913 return new_set;

2037 }	1914 }

2038	1915

2039 static tre_pos_and_tags_t *	1916 static tre_pos_and_tags_t* tre_set_union(tre_mem_t mem,

2040 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t set1, tre_pos_and_tags_t set2,	1917 tre_pos_and_tags_t* set1,

2041 » int *tags, int assertions)	1918 tre_pos_and_tags_t* set2,

2042 {	1919 int* tags,

	1920 int assertions) {

2043 int s1, s2, i, j;	1921 int s1, s2, i, j;

2044 tre_pos_and_tags_t *new_set;	1922 tre_pos_and_tags_t* new_set;

2045 int *new_tags;	1923 int* new_tags;

2046 int num_tags;	1924 int num_tags;

2047	1925

2048 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);	1926 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++)

2049 for (s1 = 0; set1[s1].position >= 0; s1++);	1927 ;

2050 for (s2 = 0; set2[s2].position >= 0; s2++);	1928 for (s1 = 0; set1[s1].position >= 0; s1++)

	1929 ;

	1930 for (s2 = 0; set2[s2].position >= 0; s2++)

	1931 ;

2051 new_set = tre_mem_calloc(mem, sizeof(new_set) (s1 + s2 + 1));	1932 new_set = tre_mem_calloc(mem, sizeof(new_set) (s1 + s2 + 1));

2052 if (!new_set )	1933 if (!new_set)

2053 return NULL;	1934 return NULL;

2054	1935

2055 for (s1 = 0; set1[s1].position >= 0; s1++)	1936 for (s1 = 0; set1[s1].position >= 0; s1++) {

2056 {	1937 new_set[s1].position = set1[s1].position;

2057 new_set[s1].position = set1[s1].position;	1938 new_set[s1].code_min = set1[s1].code_min;

2058 new_set[s1].code_min = set1[s1].code_min;	1939 new_set[s1].code_max = set1[s1].code_max;

2059 new_set[s1].code_max = set1[s1].code_max;	1940 new_set[s1].assertions = set1[s1].assertions \| assertions;

2060 new_set[s1].assertions = set1[s1].assertions \| assertions;	1941 new_set[s1].class = set1[s1].class;

2061 new_set[s1].class = set1[s1].class;	1942 new_set[s1].neg_classes = set1[s1].neg_classes;

2062 new_set[s1].neg_classes = set1[s1].neg_classes;	1943 new_set[s1].backref = set1[s1].backref;

2063 new_set[s1].backref = set1[s1].backref;	1944 if (set1[s1].tags == NULL && tags == NULL)

2064 if (set1[s1].tags == NULL && tags == NULL)	1945 new_set[s1].tags = NULL;

2065 » new_set[s1].tags = NULL;	1946 else {

2066 else	1947 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++)

2067 » {	1948 ;

2068 » for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);	1949 new_tags = tre_mem_alloc(mem, (sizeof(new_tags) (i + num_tags + 1)));

2069 » new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)	1950 if (new_tags == NULL)

2070 » » » » » * (i + num_tags + 1)));	1951 return NULL;

2071 » if (new_tags == NULL)	1952 for (j = 0; j < i; j++)

2072 » return NULL;	1953 new_tags[j] = set1[s1].tags[j];

2073 » for (j = 0; j < i; j++)	1954 for (i = 0; i < num_tags; i++)

2074 » new_tags[j] = set1[s1].tags[j];	1955 new_tags[j + i] = tags[i];

2075 » for (i = 0; i < num_tags; i++)	1956 new_tags[j + i] = -1;

2076 » new_tags[j + i] = tags[i];	1957 new_set[s1].tags = new_tags;

2077 » new_tags[j + i] = -1;

2078 » new_set[s1].tags = new_tags;

2079 » }

2080 }	1958 }

	1959 }

2081	1960

2082 for (s2 = 0; set2[s2].position >= 0; s2++)	1961 for (s2 = 0; set2[s2].position >= 0; s2++) {

2083 {	1962 new_set[s1 + s2].position = set2[s2].position;

2084 new_set[s1 + s2].position = set2[s2].position;	1963 new_set[s1 + s2].code_min = set2[s2].code_min;

2085 new_set[s1 + s2].code_min = set2[s2].code_min;	1964 new_set[s1 + s2].code_max = set2[s2].code_max;

2086 new_set[s1 + s2].code_max = set2[s2].code_max;	1965 /* XXX - why not \| assertions here as well? */

2087 /* XXX - why not \| assertions here as well? */	1966 new_set[s1 + s2].assertions = set2[s2].assertions;

2088 new_set[s1 + s2].assertions = set2[s2].assertions;	1967 new_set[s1 + s2].class = set2[s2].class;

2089 new_set[s1 + s2].class = set2[s2].class;	1968 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;

2090 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;	1969 new_set[s1 + s2].backref = set2[s2].backref;

2091 new_set[s1 + s2].backref = set2[s2].backref;	1970 if (set2[s2].tags == NULL)

2092 if (set2[s2].tags == NULL)	1971 new_set[s1 + s2].tags = NULL;

2093 » new_set[s1 + s2].tags = NULL;	1972 else {

2094 else	1973 for (i = 0; set2[s2].tags[i] >= 0; i++)

2095 » {	1974 ;

2096 » for (i = 0; set2[s2].tags[i] >= 0; i++);	1975 new_tags = tre_mem_alloc(mem, sizeof(new_tags) (i + 1));

2097 » new_tags = tre_mem_alloc(mem, sizeof(new_tags) (i + 1));	1976 if (new_tags == NULL)

2098 » if (new_tags == NULL)	1977 return NULL;

2099 » return NULL;	1978 for (j = 0; j < i; j++)

2100 » for (j = 0; j < i; j++)	1979 new_tags[j] = set2[s2].tags[j];

2101 » new_tags[j] = set2[s2].tags[j];	1980 new_tags[j] = -1;

2102 » new_tags[j] = -1;	1981 new_set[s1 + s2].tags = new_tags;

2103 » new_set[s1 + s2].tags = new_tags;

2104 » }

2105 }	1982 }

	1983 }

2106 new_set[s1 + s2].position = -1;	1984 new_set[s1 + s2].position = -1;

2107 return new_set;	1985 return new_set;

2108 }	1986 }

2109	1987

2110 /* Finds the empty path through `node' which is the one that should be	1988 /* Finds the empty path through `node' which is the one that should be

2111 taken according to POSIX.2 rules, and adds the tags on that path to	1989 taken according to POSIX.2 rules, and adds the tags on that path to

2112 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is	1990 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is

2113 set to the number of tags seen on the path. */	1991 set to the number of tags seen on the path. */

2114 static reg_errcode_t	1992 static reg_errcode_t tre_match_empty(tre_stack_t* stack,

2115 tre_match_empty(tre_stack_t stack, tre_ast_node_t node, int *tags,	1993 tre_ast_node_t* node,

2116 » » int assertions, int num_tags_seen)	1994 int* tags,

2117 {	1995 int* assertions,

2118 tre_literal_t *lit;	1996 int* num_tags_seen) {

2119 tre_union_t *uni;	1997 tre_literal_t* lit;

2120 tre_catenation_t *cat;	1998 tre_union_t* uni;

2121 tre_iteration_t *iter;	1999 tre_catenation_t* cat;

	2000 tre_iteration_t* iter;

2122 int i;	2001 int i;

2123 int bottom = tre_stack_num_objects(stack);	2002 int bottom = tre_stack_num_objects(stack);

2124 reg_errcode_t status = REG_OK;	2003 reg_errcode_t status = REG_OK;

2125 if (num_tags_seen)	2004 if (num_tags_seen)

2126 *num_tags_seen = 0;	2005 *num_tags_seen = 0;

2127	2006

2128 status = tre_stack_push_voidptr(stack, node);	2007 status = tre_stack_push_voidptr(stack, node);

2129	2008

2130 /* Walk through the tree recursively. */	2009 /* Walk through the tree recursively. */

2131 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)	2010 while (status == REG_OK && tre_stack_num_objects(stack) > bottom) {

2132 {	2011 node = tre_stack_pop_voidptr(stack);

2133 node = tre_stack_pop_voidptr(stack);

2134	2012

2135 switch (node->type)	2013 switch (node->type) {

2136 » {	2014 case LITERAL:

2137 » case LITERAL:	2015 lit = (tre_literal_t*)node->obj;

2138 » lit = (tre_literal_t *)node->obj;	2016 switch (lit->code_min) {

2139 » switch (lit->code_min)	2017 case TAG:

2140 » {	2018 if (lit->code_max >= 0) {

2141 » case TAG:	2019 if (tags != NULL) {

2142 » if (lit->code_max >= 0)	2020 /* Add the tag to `tags'. */

2143 » » {	2021 for (i = 0; tags[i] >= 0; i++)

2144 » » if (tags != NULL)	2022 if (tags[i] == lit->code_max)

2145 » » {	2023 break;

2146 » » /* Add the tag to `tags'. */	2024 if (tags[i] < 0) {

2147 » » for (i = 0; tags[i] >= 0; i++)	2025 tags[i] = lit->code_max;

2148 » » » if (tags[i] == lit->code_max)	2026 tags[i + 1] = -1;

2149 » » » break;	2027 }

2150 » » if (tags[i] < 0)	2028 }

2151 » » » {	2029 if (num_tags_seen)

2152 » » » tags[i] = lit->code_max;	2030 (*num_tags_seen)++;

2153 » » » tags[i + 1] = -1;	2031 }

2154 » » » }	2032 break;

2155 » » }	2033 case ASSERTION:

2156 » » if (num_tags_seen)	2034 assert(lit->code_max >= 1 \|\| lit->code_max <= ASSERT_LAST);

2157 » » (*num_tags_seen)++;	2035 if (assertions != NULL)

2158 » » }	2036 *assertions \|= lit->code_max;

2159 » break;	2037 break;

2160 » case ASSERTION:	2038 case EMPTY:

2161 » assert(lit->code_max >= 1	2039 break;

2162 » » \|\| lit->code_max <= ASSERT_LAST);	2040 default:

2163 » if (assertions != NULL)	2041 assert(0);

2164 » » *assertions \|= lit->code_max;	2042 break;

2165 » break;	2043 }

2166 » case EMPTY:	2044 break;

2167 » break;

2168 » default:

2169 » assert(0);

2170 » break;

2171 » }

2172 » break;

2173	2045

2174 » case UNION:	2046 case UNION:

2175 » /* Subexpressions starting earlier take priority over ones	2047 /* Subexpressions starting earlier take priority over ones

2176 » starting later, so we prefer the left subexpression over the	2048 starting later, so we prefer the left subexpression over the

2177 » right subexpression. */	2049 right subexpression. */

2178 » uni = (tre_union_t *)node->obj;	2050 uni = (tre_union_t*)node->obj;

2179 » if (uni->left->nullable)	2051 if (uni->left->nullable)

2180 » STACK_PUSHX(stack, voidptr, uni->left)	2052 STACK_PUSHX(stack, voidptr, uni->left)

2181 » else if (uni->right->nullable)	2053 else if (uni->right->nullable)

2182 » STACK_PUSHX(stack, voidptr, uni->right)	2054 STACK_PUSHX(stack, voidptr, uni->right)

2183 » else	2055 else

2184 » assert(0);	2056 assert(0);

2185 » break;	2057 break;

2186	2058

2187 » case CATENATION:	2059 case CATENATION:

2188 » /* The path must go through both children. */	2060 /* The path must go through both children. */

2189 » cat = (tre_catenation_t *)node->obj;	2061 cat = (tre_catenation_t*)node->obj;

2190 » assert(cat->left->nullable);	2062 assert(cat->left->nullable);

2191 » assert(cat->right->nullable);	2063 assert(cat->right->nullable);

2192 » STACK_PUSHX(stack, voidptr, cat->left);	2064 STACK_PUSHX(stack, voidptr, cat->left);

2193 » STACK_PUSHX(stack, voidptr, cat->right);	2065 STACK_PUSHX(stack, voidptr, cat->right);

2194 » break;	2066 break;

2195	2067

2196 » case ITERATION:	2068 case ITERATION:

2197 » /* A match with an empty string is preferred over no match at	2069 /* A match with an empty string is preferred over no match at

2198 » all, so we go through the argument if possible. */	2070 all, so we go through the argument if possible. */

2199 » iter = (tre_iteration_t *)node->obj;	2071 iter = (tre_iteration_t*)node->obj;

2200 » if (iter->arg->nullable)	2072 if (iter->arg->nullable)

2201 » STACK_PUSHX(stack, voidptr, iter->arg);	2073 STACK_PUSHX(stack, voidptr, iter->arg);

2202 » break;	2074 break;

2203	2075

2204 » default:	2076 default:

2205 » assert(0);	2077 assert(0);

2206 » break;	2078 break;

2207 » }

2208 }	2079 }

	2080 }

2209	2081

2210 return status;	2082 return status;

2211 }	2083 }

2212	2084

2213

2214 typedef enum {	2085 typedef enum {

2215 NFL_RECURSE,	2086 NFL_RECURSE,

2216 NFL_POST_UNION,	2087 NFL_POST_UNION,

2217 NFL_POST_CATENATION,	2088 NFL_POST_CATENATION,

2218 NFL_POST_ITERATION	2089 NFL_POST_ITERATION

2219 } tre_nfl_stack_symbol_t;	2090 } tre_nfl_stack_symbol_t;

2220	2091

2221

2222 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for	2092 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for

2223 the nodes of the AST `tree'. */	2093 the nodes of the AST `tree'. */

2224 static reg_errcode_t	2094 static reg_errcode_t tre_compute_nfl(tre_mem_t mem,

2225 tre_compute_nfl(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree)	2095 tre_stack_t* stack,

2226 {	2096 tre_ast_node_t* tree) {

2227 int bottom = tre_stack_num_objects(stack);	2097 int bottom = tre_stack_num_objects(stack);

2228	2098

2229 STACK_PUSHR(stack, voidptr, tree);	2099 STACK_PUSHR(stack, voidptr, tree);

2230 STACK_PUSHR(stack, int, NFL_RECURSE);	2100 STACK_PUSHR(stack, int, NFL_RECURSE);

2231	2101

2232 while (tre_stack_num_objects(stack) > bottom)	2102 while (tre_stack_num_objects(stack) > bottom) {

2233 {	2103 tre_nfl_stack_symbol_t symbol;

2234 tre_nfl_stack_symbol_t symbol;	2104 tre_ast_node_t* node;

2235 tre_ast_node_t *node;	2105

2236	2106 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);

2237 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);	2107 node = tre_stack_pop_voidptr(stack);

2238 node = tre_stack_pop_voidptr(stack);	2108 switch (symbol) {

2239 switch (symbol)	2109 case NFL_RECURSE:

2240 {	2110 switch (node->type) {

2241 case NFL_RECURSE:	2111 case LITERAL: {

2242 switch (node->type)	2112 tre_literal_t* lit = (tre_literal_t*)node->obj;

2243 {	2113 if (IS_BACKREF(lit)) {

2244 case LITERAL:	2114 /* Back references: nullable = false, firstpos = {i},

2245 {	2115 lastpos = {i}. */

2246 tre_literal_t lit = (tre_literal_t )node->obj;	2116 node->nullable = 0;

2247 if (IS_BACKREF(lit))	2117 node->firstpos =

2248 {	2118 tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);

2249 /* Back references: nullable = false, firstpos = {i},	2119 if (!node->firstpos)

2250 lastpos = {i}. */	2120 return REG_ESPACE;

2251 node->nullable = 0;	2121 node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX,

2252 node->firstpos = tre_set_one(mem, lit->position, 0,	2122 0, NULL, (int)lit->code_max);

2253 TRE_CHAR_MAX, 0, NULL, -1);	2123 if (!node->lastpos)

2254 if (!node->firstpos)	2124 return REG_ESPACE;

2255 return REG_ESPACE;	2125 } else if (lit->code_min < 0) {

2256 node->lastpos = tre_set_one(mem, lit->position, 0,	2126 /* Tags, empty strings, params, and zero width assertions:

2257 TRE_CHAR_MAX, 0, NULL,	2127 nullable = true, firstpos = {}, and lastpos = {}. */

2258 (int)lit->code_max);	2128 node->nullable = 1;

2259 if (!node->lastpos)	2129 node->firstpos = tre_set_empty(mem);

2260 return REG_ESPACE;	2130 if (!node->firstpos)

2261 }	2131 return REG_ESPACE;

2262 else if (lit->code_min < 0)	2132 node->lastpos = tre_set_empty(mem);

2263 {	2133 if (!node->lastpos)

2264 /* Tags, empty strings, params, and zero width assertions:	2134 return REG_ESPACE;

2265 nullable = true, firstpos = {}, and lastpos = {}. */	2135 } else {

2266 node->nullable = 1;	2136 /* Literal at position i: nullable = false, firstpos = {i},

2267 node->firstpos = tre_set_empty(mem);	2137 lastpos = {i}. */

2268 if (!node->firstpos)	2138 node->nullable = 0;

2269 return REG_ESPACE;	2139 node->firstpos =

2270 node->lastpos = tre_set_empty(mem);	2140 tre_set_one(mem, lit->position, (int)lit->code_min,

2271 if (!node->lastpos)	2141 (int)lit->code_max, 0, NULL, -1);

2272 return REG_ESPACE;	2142 if (!node->firstpos)

2273 }	2143 return REG_ESPACE;

2274 else	2144 node->lastpos = tre_set_one(

2275 {	2145 mem, lit->position, (int)lit->code_min, (int)lit->code_max,

2276 /* Literal at position i: nullable = false, firstpos = {i},	2146 lit->class, lit->neg_classes, -1);

2277 lastpos = {i}. */	2147 if (!node->lastpos)

2278 node->nullable = 0;	2148 return REG_ESPACE;

2279 node->firstpos =	2149 }

2280 tre_set_one(mem, lit->position, (int)lit->code_min,	2150 break;

2281 (int)lit->code_max, 0, NULL, -1);	2151 }

2282 if (!node->firstpos)	2152

2283 return REG_ESPACE;	2153 case UNION:

2284 node->lastpos = tre_set_one(mem, lit->position,	2154 /* Compute the attributes for the two subtrees, and after that

2285 (int)lit->code_min,	2155 for this node. */

2286 (int)lit->code_max,	2156 STACK_PUSHR(stack, voidptr, node);

2287 lit->class, lit->neg_classes,	2157 STACK_PUSHR(stack, int, NFL_POST_UNION);

2288 -1);	2158 STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->right);

2289 if (!node->lastpos)	2159 STACK_PUSHR(stack, int, NFL_RECURSE);

2290 return REG_ESPACE;	2160 STACK_PUSHR(stack, voidptr, ((tre_union_t*)node->obj)->left);

2291 }	2161 STACK_PUSHR(stack, int, NFL_RECURSE);

2292 break;	2162 break;

2293 }	2163

2294	2164 case CATENATION:

2295 case UNION:	2165 /* Compute the attributes for the two subtrees, and after that

2296 /* Compute the attributes for the two subtrees, and after that	2166 for this node. */

2297 for this node. */	2167 STACK_PUSHR(stack, voidptr, node);

2298 STACK_PUSHR(stack, voidptr, node);	2168 STACK_PUSHR(stack, int, NFL_POST_CATENATION);

2299 STACK_PUSHR(stack, int, NFL_POST_UNION);	2169 STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->right);

2300 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);	2170 STACK_PUSHR(stack, int, NFL_RECURSE);

2301 STACK_PUSHR(stack, int, NFL_RECURSE);	2171 STACK_PUSHR(stack, voidptr, ((tre_catenation_t*)node->obj)->left);

2302 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);	2172 STACK_PUSHR(stack, int, NFL_RECURSE);

2303 STACK_PUSHR(stack, int, NFL_RECURSE);	2173 break;

2304 break;	2174

2305	2175 case ITERATION:

2306 case CATENATION:	2176 /* Compute the attributes for the subtree, and after that for

2307 /* Compute the attributes for the two subtrees, and after that	2177 this node. */

2308 for this node. */	2178 STACK_PUSHR(stack, voidptr, node);

2309 STACK_PUSHR(stack, voidptr, node);	2179 STACK_PUSHR(stack, int, NFL_POST_ITERATION);

2310 STACK_PUSHR(stack, int, NFL_POST_CATENATION);	2180 STACK_PUSHR(stack, voidptr, ((tre_iteration_t*)node->obj)->arg);

2311 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right );	2181 STACK_PUSHR(stack, int, NFL_RECURSE);

2312 STACK_PUSHR(stack, int, NFL_RECURSE);	2182 break;

2313 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left) ;	2183 }

2314 STACK_PUSHR(stack, int, NFL_RECURSE);	2184 break; /* end case: NFL_RECURSE */

2315 break;	2185

2316	2186 case NFL_POST_UNION: {

2317 case ITERATION:	2187 tre_union_t* uni = (tre_union_t*)node->obj;

2318 /* Compute the attributes for the subtree, and after that for	2188 node->nullable = uni->left->nullable \|\| uni->right->nullable;

2319 this node. */	2189 node->firstpos = tre_set_union(mem, uni->left->firstpos,

2320 STACK_PUSHR(stack, voidptr, node);	2190 uni->right->firstpos, NULL, 0);

2321 STACK_PUSHR(stack, int, NFL_POST_ITERATION);	2191 if (!node->firstpos)

2322 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);	2192 return REG_ESPACE;

2323 STACK_PUSHR(stack, int, NFL_RECURSE);	2193 node->lastpos = tre_set_union(mem, uni->left->lastpos,

2324 break;	2194 uni->right->lastpos, NULL, 0);

2325 }	2195 if (!node->lastpos)

2326 break; /* end case: NFL_RECURSE */	2196 return REG_ESPACE;

2327	2197 break;

2328 case NFL_POST_UNION:	2198 }

2329 {	2199

2330 tre_union_t uni = (tre_union_t )node->obj;	2200 case NFL_POST_ITERATION: {

2331 node->nullable = uni->left->nullable \|\| uni->right->nullable;	2201 tre_iteration_t* iter = (tre_iteration_t*)node->obj;

2332 node->firstpos = tre_set_union(mem, uni->left->firstpos,	2202

2333 uni->right->firstpos, NULL, 0);	2203 if (iter->min == 0 \|\| iter->arg->nullable)

2334 if (!node->firstpos)	2204 node->nullable = 1;

2335 return REG_ESPACE;	2205 else

2336 node->lastpos = tre_set_union(mem, uni->left->lastpos,	2206 node->nullable = 0;

2337 uni->right->lastpos, NULL, 0);	2207 node->firstpos = iter->arg->firstpos;

2338 if (!node->lastpos)	2208 node->lastpos = iter->arg->lastpos;

2339 return REG_ESPACE;	2209 break;

2340 break;	2210 }

2341 }	2211

2342	2212 case NFL_POST_CATENATION: {

2343 case NFL_POST_ITERATION:	2213 int num_tags, *tags, assertions;

2344 {	2214 reg_errcode_t status;

2345 tre_iteration_t iter = (tre_iteration_t )node->obj;	2215 tre_catenation_t* cat = node->obj;

2346	2216 node->nullable = cat->left->nullable && cat->right->nullable;

2347 if (iter->min == 0 \|\| iter->arg->nullable)	2217

2348 node->nullable = 1;	2218 /* Compute firstpos. */

2349 else	2219 if (cat->left->nullable) {

2350 node->nullable = 0;	2220 /* The left side matches the empty string. Make a first pass

2351 node->firstpos = iter->arg->firstpos;	2221 with tre_match_empty() to get the number of tags and

2352 node->lastpos = iter->arg->lastpos;	2222 parameters. */

2353 break;	2223 status = tre_match_empty(stack, cat->left, NULL, NULL, &num_tags);

2354 }	2224 if (status != REG_OK)

2355	2225 return status;

2356 case NFL_POST_CATENATION:	2226 /* Allocate arrays for the tags and parameters. */

2357 {	2227 tags = xmalloc(sizeof(tags) (num_tags + 1));

2358 int num_tags, *tags, assertions;	2228 if (!tags)

2359 reg_errcode_t status;	2229 return REG_ESPACE;

2360 tre_catenation_t *cat = node->obj;	2230 tags[0] = -1;

2361 node->nullable = cat->left->nullable && cat->right->nullable;	2231 assertions = 0;

2362	2232 /* Second pass with tre_mach_empty() to get the list of

2363 /* Compute firstpos. */	2233 tags and parameters. */

2364 if (cat->left->nullable)	2234 status = tre_match_empty(stack, cat->left, tags, &assertions, NULL);

2365 {	2235 if (status != REG_OK) {

2366 /* The left side matches the empty string. Make a first pass	2236 xfree(tags);

2367 with tre_match_empty() to get the number of tags and	2237 return status;

2368 parameters. */	2238 }

2369 status = tre_match_empty(stack, cat->left,	2239 node->firstpos = tre_set_union(mem, cat->right->firstpos,

2370 NULL, NULL, &num_tags);	2240 cat->left->firstpos, tags, assertions);

2371 if (status != REG_OK)	2241 xfree(tags);

2372 return status;	2242 if (!node->firstpos)

2373 /* Allocate arrays for the tags and parameters. */	2243 return REG_ESPACE;

2374 tags = xmalloc(sizeof(tags) (num_tags + 1));	2244 } else {

2375 if (!tags)	2245 node->firstpos = cat->left->firstpos;

2376 return REG_ESPACE;	2246 }

2377 tags[0] = -1;	2247

2378 assertions = 0;	2248 /* Compute lastpos. */

2379 /* Second pass with tre_mach_empty() to get the list of	2249 if (cat->right->nullable) {

2380 tags and parameters. */	2250 /* The right side matches the empty string. Make a first pass

2381 status = tre_match_empty(stack, cat->left, tags,	2251 with tre_match_empty() to get the number of tags and

2382 &assertions, NULL);	2252 parameters. */

2383 if (status != REG_OK)	2253 status = tre_match_empty(stack, cat->right, NULL, NULL, &num_tags);

2384 {	2254 if (status != REG_OK)

2385 xfree(tags);	2255 return status;

2386 return status;	2256 /* Allocate arrays for the tags and parameters. */

2387 }	2257 tags = xmalloc(sizeof(int) * (num_tags + 1));

2388 node->firstpos =	2258 if (!tags)

2389 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,	2259 return REG_ESPACE;

2390 tags, assertions);	2260 tags[0] = -1;

2391 xfree(tags);	2261 assertions = 0;

2392 if (!node->firstpos)	2262 /* Second pass with tre_mach_empty() to get the list of

2393 return REG_ESPACE;	2263 tags and parameters. */

2394 }	2264 status = tre_match_empty(stack, cat->right, tags, &assertions, NULL);

2395 else	2265 if (status != REG_OK) {

2396 {	2266 xfree(tags);

2397 node->firstpos = cat->left->firstpos;	2267 return status;

2398 }	2268 }

2399	2269 node->lastpos = tre_set_union(mem, cat->left->lastpos,

2400 /* Compute lastpos. */	2270 cat->right->lastpos, tags, assertions);

2401 if (cat->right->nullable)	2271 xfree(tags);

2402 {	2272 if (!node->lastpos)

2403 /* The right side matches the empty string. Make a first pass	2273 return REG_ESPACE;

2404 with tre_match_empty() to get the number of tags and	2274 } else {

2405 parameters. */	2275 node->lastpos = cat->right->lastpos;

2406 status = tre_match_empty(stack, cat->right,	2276 }

2407 NULL, NULL, &num_tags);	2277 break;

2408 if (status != REG_OK)	2278 }

2409 return status;	2279

2410 /* Allocate arrays for the tags and parameters. */	2280 default:

2411 tags = xmalloc(sizeof(int) * (num_tags + 1));	2281 assert(0);

2412 if (!tags)	2282 break;

2413 return REG_ESPACE;

2414 tags[0] = -1;

2415 assertions = 0;

2416 /* Second pass with tre_mach_empty() to get the list of

2417 tags and parameters. */

2418 status = tre_match_empty(stack, cat->right, tags,

2419 &assertions, NULL);

2420 if (status != REG_OK)

2421 {

2422 xfree(tags);

2423 return status;

2424 }

2425 node->lastpos =

2426 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,

2427 tags, assertions);

2428 xfree(tags);

2429 if (!node->lastpos)

2430 return REG_ESPACE;

2431 }

2432 else

2433 {

2434 node->lastpos = cat->right->lastpos;

2435 }

2436 break;

2437 }

2438

2439 default:

2440 assert(0);

2441 break;

2442 }

2443 }	2283 }

	2284 }

2444	2285

2445 return REG_OK;	2286 return REG_OK;

2446 }	2287 }

2447	2288

2448

2449 /* Adds a transition from each position in `p1' to each position in `p2'. */	2289 /* Adds a transition from each position in `p1' to each position in `p2'. */

2450 static reg_errcode_t	2290 static reg_errcode_t tre_make_trans(tre_pos_and_tags_t* p1,

2451 tre_make_trans(tre_pos_and_tags_t p1, tre_pos_and_tags_t p2,	2291 tre_pos_and_tags_t* p2,

2452 » tre_tnfa_transition_t *transitions,	2292 tre_tnfa_transition_t* transitions,

2453 » int counts, int offs)	2293 int* counts,

2454 {	2294 int* offs) {

2455 tre_pos_and_tags_t *orig_p2 = p2;	2295 tre_pos_and_tags_t* orig_p2 = p2;

2456 tre_tnfa_transition_t *trans;	2296 tre_tnfa_transition_t* trans;

2457 int i, j, k, l, dup, prev_p2_pos;	2297 int i, j, k, l, dup, prev_p2_pos;

2458	2298

2459 if (transitions != NULL)	2299 if (transitions != NULL)

2460 while (p1->position >= 0)	2300 while (p1->position >= 0) {

2461 {	2301 p2 = orig_p2;

2462 » p2 = orig_p2;	2302 prev_p2_pos = -1;

2463 » prev_p2_pos = -1;	2303 while (p2->position >= 0) {

2464 » while (p2->position >= 0)	2304 /* Optimization: if this position was already handled, skip it. */

2465 » {	2305 if (p2->position == prev_p2_pos) {

2466 » /* Optimization: if this position was already handled, skip it. */	2306 p2++;

2467 » if (p2->position == prev_p2_pos)	2307 continue;

2468 » {	2308 }

2469 » » p2++;	2309 prev_p2_pos = p2->position;

2470 » » continue;	2310 /* Set `trans' to point to the next unused transition from

2471 » }	2311 position `p1->position'. */

2472 » prev_p2_pos = p2->position;	2312 trans = transitions + offs[p1->position];

2473 » /* Set `trans' to point to the next unused transition from	2313 while (trans->state != NULL) {

2474 » position `p1->position'. */

2475 » trans = transitions + offs[p1->position];

2476 » while (trans->state != NULL)

2477 » {

2478 #if 0	2314 #if 0

2479 /* If we find a previous transition from `p1->position' to	2315 /* If we find a previous transition from `p1->position' to

2480 `p2->position', it is overwritten. This can happen only	2316 `p2->position', it is overwritten. This can happen only

2481 if there are nested loops in the regexp, like in "((a))".	2317 if there are nested loops in the regexp, like in "((a))".

2482 In POSIX.2 repetition using the outer loop is always	2318 In POSIX.2 repetition using the outer loop is always

2483 preferred over using the inner loop. Therefore the	2319 preferred over using the inner loop. Therefore the

2484 transition for the inner loop is useless and can be thrown	2320 transition for the inner loop is useless and can be thrown

2485 away. */	2321 away. */

2486 /* XXX - The same position is used for all nodes in a bracket	2322 /* XXX - The same position is used for all nodes in a bracket

2487 expression, so this optimization cannot be used (it will	2323 expression, so this optimization cannot be used (it will

2488 break bracket expressions) unless I figure out a way to	2324 break bracket expressions) unless I figure out a way to

2489 detect it here. */	2325 detect it here. */

2490 if (trans->state_id == p2->position)	2326 if (trans->state_id == p2->position)

2491 {	2327 {

2492 break;	2328 break;

2493 }	2329 }

2494 #endif	2330 #endif

2495 » » trans++;	2331 trans++;

2496 » }	2332 }

2497	2333

2498 » if (trans->state == NULL)	2334 if (trans->state == NULL)

2499 » (trans + 1)->state = NULL;	2335 (trans + 1)->state = NULL;

2500 » /* Use the character ranges, assertions, etc. from `p1' for	2336 /* Use the character ranges, assertions, etc. from `p1' for

2501 » the transition from `p1' to `p2'. */	2337 the transition from `p1' to `p2'. */

2502 » trans->code_min = p1->code_min;	2338 trans->code_min = p1->code_min;

2503 » trans->code_max = p1->code_max;	2339 trans->code_max = p1->code_max;

2504 » trans->state = transitions + offs[p2->position];	2340 trans->state = transitions + offs[p2->position];

2505 » trans->state_id = p2->position;	2341 trans->state_id = p2->position;

2506 » trans->assertions = p1->assertions \| p2->assertions	2342 trans->assertions =

2507 » \| (p1->class ? ASSERT_CHAR_CLASS : 0)	2343 p1->assertions \| p2->assertions \|

2508 » \| (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);	2344 (p1->class ? ASSERT_CHAR_CLASS : 0) \|

2509 » if (p1->backref >= 0)	2345 (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);

2510 » {	2346 if (p1->backref >= 0) {

2511 » » assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);	2347 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);

2512 » » assert(p2->backref < 0);	2348 assert(p2->backref < 0);

2513 » » trans->u.backref = p1->backref;	2349 trans->u.backref = p1->backref;

2514 » » trans->assertions \|= ASSERT_BACKREF;	2350 trans->assertions \|= ASSERT_BACKREF;

2515 » }	2351 } else

2516 » else	2352 trans->u.class = p1->class;

2517 » trans->u.class = p1->class;	2353 if (p1->neg_classes != NULL) {

2518 » if (p1->neg_classes != NULL)	2354 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)

2519 » {	2355 ;

2520 » » for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);	2356 trans->neg_classes = xmalloc(sizeof(trans->neg_classes) (i + 1));

2521 » » trans->neg_classes =	2357 if (trans->neg_classes == NULL)

2522 » » xmalloc(sizeof(trans->neg_classes) (i + 1));	2358 return REG_ESPACE;

2523 » » if (trans->neg_classes == NULL)	2359 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)

2524 » » return REG_ESPACE;	2360 trans->neg_classes[i] = p1->neg_classes[i];

2525 » » for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)	2361 trans->neg_classes[i] = (tre_ctype_t)0;

2526 » » trans->neg_classes[i] = p1->neg_classes[i];	2362 } else

2527 » » trans->neg_classes[i] = (tre_ctype_t)0;	2363 trans->neg_classes = NULL;

2528 » }

2529 » else

2530 » trans->neg_classes = NULL;

2531	2364

2532 » /* Find out how many tags this transition has. */	2365 /* Find out how many tags this transition has. */

2533 » i = 0;	2366 i = 0;

2534 » if (p1->tags != NULL)	2367 if (p1->tags != NULL)

2535 » while(p1->tags[i] >= 0)	2368 while (p1->tags[i] >= 0)

2536 » » i++;	2369 i++;

2537 » j = 0;	2370 j = 0;

2538 » if (p2->tags != NULL)	2371 if (p2->tags != NULL)

2539 » while(p2->tags[j] >= 0)	2372 while (p2->tags[j] >= 0)

2540 » » j++;	2373 j++;

2541	2374

2542 » /* If we are overwriting a transition, free the old tag array. */	2375 /* If we are overwriting a transition, free the old tag array. */

2543 » if (trans->tags != NULL)	2376 if (trans->tags != NULL)

2544 » xfree(trans->tags);	2377 xfree(trans->tags);

2545 » trans->tags = NULL;	2378 trans->tags = NULL;

2546	2379

2547 » /* If there were any tags, allocate an array and fill it. */	2380 /* If there were any tags, allocate an array and fill it. */

2548 » if (i + j > 0)	2381 if (i + j > 0) {

2549 » {	2382 trans->tags = xmalloc(sizeof(trans->tags) (i + j + 1));

2550 » » trans->tags = xmalloc(sizeof(trans->tags) (i + j + 1));	2383 if (!trans->tags)

2551 » » if (!trans->tags)	2384 return REG_ESPACE;

2552 » » return REG_ESPACE;	2385 i = 0;

2553 » » i = 0;	2386 if (p1->tags != NULL)

2554 » » if (p1->tags != NULL)	2387 while (p1->tags[i] >= 0) {

2555 » » while(p1->tags[i] >= 0)	2388 trans->tags[i] = p1->tags[i];

2556 » » {	2389 i++;

2557 » » trans->tags[i] = p1->tags[i];	2390 }

2558 » » i++;	2391 l = i;

2559 » » }	2392 j = 0;

2560 » » l = i;	2393 if (p2->tags != NULL)

2561 » » j = 0;	2394 while (p2->tags[j] >= 0) {

2562 » » if (p2->tags != NULL)	2395 /* Don't add duplicates. */

2563 » » while (p2->tags[j] >= 0)	2396 dup = 0;

2564 » » {	2397 for (k = 0; k < i; k++)

2565 » » /* Don't add duplicates. */	2398 if (trans->tags[k] == p2->tags[j]) {

2566 » » dup = 0;	2399 dup = 1;

2567 » » for (k = 0; k < i; k++)	2400 break;

2568 » » » if (trans->tags[k] == p2->tags[j])	2401 }

2569 » » » {	2402 if (!dup)

2570 » » » dup = 1;	2403 trans->tags[l++] = p2->tags[j];

2571 » » » break;	2404 j++;

2572 » » » }	2405 }

2573 » » if (!dup)	2406 trans->tags[l] = -1;

2574 » » » trans->tags[l++] = p2->tags[j];	2407 }

2575 » » j++;

2576 » » }

2577 » » trans->tags[l] = -1;

2578 » }

2579	2408

2580 » p2++;	2409 p2++;

2581 » }

2582 » p1++;

2583 }	2410 }

	2411 p1++;

	2412 }

2584 else	2413 else

2585 /* Compute a maximum limit for the number of transitions leaving	2414 /* Compute a maximum limit for the number of transitions leaving

2586 from each state. */	2415 from each state. */

2587 while (p1->position >= 0)	2416 while (p1->position >= 0) {

2588 {	2417 p2 = orig_p2;

2589 » p2 = orig_p2;	2418 while (p2->position >= 0) {

2590 » while (p2->position >= 0)	2419 counts[p1->position]++;

2591 » {	2420 p2++;

2592 » counts[p1->position]++;

2593 » p2++;

2594 » }

2595 » p1++;

2596 }	2421 }

	2422 p1++;

	2423 }

2597 return REG_OK;	2424 return REG_OK;

2598 }	2425 }

2599	2426

2600 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are	2427 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are

2601 labelled with one character range (there are no transitions on empty	2428 labelled with one character range (there are no transitions on empty

2602 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of	2429 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of

2603 the regexp. */	2430 the regexp. */

2604 static reg_errcode_t	2431 static reg_errcode_t tre_ast_to_tnfa(tre_ast_node_t* node,

2605 tre_ast_to_tnfa(tre_ast_node_t node, tre_tnfa_transition_t transitions,	2432 tre_tnfa_transition_t* transitions,

2606 » » int counts, int offs)	2433 int* counts,

2607 {	2434 int* offs) {

2608 tre_union_t *uni;	2435 tre_union_t* uni;

2609 tre_catenation_t *cat;	2436 tre_catenation_t* cat;

2610 tre_iteration_t *iter;	2437 tre_iteration_t* iter;

2611 reg_errcode_t errcode = REG_OK;	2438 reg_errcode_t errcode = REG_OK;

2612	2439

2613 /* XXX - recurse using a stack!. */	2440 /* XXX - recurse using a stack!. */

2614 switch (node->type)	2441 switch (node->type) {

2615 {

2616 case LITERAL:	2442 case LITERAL:

2617 break;	2443 break;

2618 case UNION:	2444 case UNION:

2619 uni = (tre_union_t *)node->obj;	2445 uni = (tre_union_t*)node->obj;

2620 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);	2446 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);

2621 if (errcode != REG_OK)	2447 if (errcode != REG_OK)

2622 » return errcode;	2448 return errcode;

2623 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);	2449 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);

2624 break;	2450 break;

2625	2451

2626 case CATENATION:	2452 case CATENATION:

2627 cat = (tre_catenation_t *)node->obj;	2453 cat = (tre_catenation_t*)node->obj;

2628 /* Add a transition from each position in cat->left->lastpos	2454 /* Add a transition from each position in cat->left->lastpos

2629 » to each position in cat->right->firstpos. */	2455 to each position in cat->right->firstpos. */

2630 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,	2456 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,

2631 » » » transitions, counts, offs);	2457 transitions, counts, offs);

2632 if (errcode != REG_OK)	2458 if (errcode != REG_OK)

2633 » return errcode;	2459 return errcode;

2634 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);	2460 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);

2635 if (errcode != REG_OK)	2461 if (errcode != REG_OK)

2636 » return errcode;	2462 return errcode;

2637 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);	2463 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);

2638 break;	2464 break;

2639	2465

2640 case ITERATION:	2466 case ITERATION:

2641 iter = (tre_iteration_t *)node->obj;	2467 iter = (tre_iteration_t*)node->obj;

2642 assert(iter->max == -1 \|\| iter->max == 1);	2468 assert(iter->max == -1 \|\| iter->max == 1);

2643	2469

2644 if (iter->max == -1)	2470 if (iter->max == -1) {

2645 » {	2471 assert(iter->min == 0 \|\| iter->min == 1);

2646 » assert(iter->min == 0 \|\| iter->min == 1);	2472 /* Add a transition from each last position in the iterated

2647 » /* Add a transition from each last position in the iterated	2473 expression to each first position. */

2648 » expression to each first position. */	2474 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,

2649 » errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,	2475 transitions, counts, offs);

2650 » » » » transitions, counts, offs);	2476 if (errcode != REG_OK)

2651 » if (errcode != REG_OK)	2477 return errcode;

2652 » return errcode;	2478 }

2653 » }

2654 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);	2479 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);

2655 break;	2480 break;

2656 }	2481 }

2657 return errcode;	2482 return errcode;

2658 }	2483 }

2659	2484

	2485 #define ERROR_EXIT(err) \

	2486 do { \

	2487 errcode = err; \

	2488 if (/CONSTCOND/ 1) \

	2489 goto error_exit; \

	2490 } while (/CONSTCOND/ 0)

2660	2491

2661 #define ERROR_EXIT(err)»» \	2492 int regcomp(regex_t* restrict preg, const char* restrict regex, int cflags) {

2662 do» » » » \	2493 tre_stack_t* stack;

2663 {» » » » \

2664 errcode = err;» » \

2665 if (/CONSTCOND/1)» \

2666 » goto error_exit;» \

2667 }» » » » \

2668 while (/CONSTCOND/0)

2669

2670

2671 int

2672 regcomp(regex_t restrict preg, const char restrict regex, int cflags)

2673 {

2674 tre_stack_t *stack;

2675 tre_ast_node_t tree, tmp_ast_l, *tmp_ast_r;	2494 tre_ast_node_t tree, tmp_ast_l, *tmp_ast_r;

2676 tre_pos_and_tags_t *p;	2495 tre_pos_and_tags_t* p;

2677 int counts = NULL, offs = NULL;	2496 int counts = NULL, offs = NULL;

2678 int i, add = 0;	2497 int i, add = 0;

2679 tre_tnfa_transition_t transitions, initial;	2498 tre_tnfa_transition_t transitions, initial;

2680 tre_tnfa_t *tnfa = NULL;	2499 tre_tnfa_t* tnfa = NULL;

2681 tre_submatch_data_t *submatch_data;	2500 tre_submatch_data_t* submatch_data;

2682 tre_tag_direction_t *tag_directions = NULL;	2501 tre_tag_direction_t* tag_directions = NULL;

2683 reg_errcode_t errcode;	2502 reg_errcode_t errcode;

2684 tre_mem_t mem;	2503 tre_mem_t mem;

2685	2504

2686 /* Parse context. */	2505 /* Parse context. */

2687 tre_parse_ctx_t parse_ctx;	2506 tre_parse_ctx_t parse_ctx;

2688	2507

2689 /* Allocate a stack used throughout the compilation process for various	2508 /* Allocate a stack used throughout the compilation process for various

2690 purposes. */	2509 purposes. */

2691 stack = tre_stack_new(512, 1024000, 128);	2510 stack = tre_stack_new(512, 1024000, 128);

2692 if (!stack)	2511 if (!stack)

2693 return REG_ESPACE;	2512 return REG_ESPACE;

2694 /* Allocate a fast memory allocator. */	2513 /* Allocate a fast memory allocator. */

2695 mem = tre_mem_new();	2514 mem = tre_mem_new();

2696 if (!mem)	2515 if (!mem) {

2697 {	2516 tre_stack_destroy(stack);

2698 tre_stack_destroy(stack);	2517 return REG_ESPACE;

2699 return REG_ESPACE;	2518 }

2700 }

2701	2519

2702 /* Parse the regexp. */	2520 /* Parse the regexp. */

2703 memset(&parse_ctx, 0, sizeof(parse_ctx));	2521 memset(&parse_ctx, 0, sizeof(parse_ctx));

2704 parse_ctx.mem = mem;	2522 parse_ctx.mem = mem;

2705 parse_ctx.stack = stack;	2523 parse_ctx.stack = stack;

2706 parse_ctx.re = regex;	2524 parse_ctx.re = regex;

2707 parse_ctx.cflags = cflags;	2525 parse_ctx.cflags = cflags;

2708 parse_ctx.max_backref = -1;	2526 parse_ctx.max_backref = -1;

2709 errcode = tre_parse(&parse_ctx);	2527 errcode = tre_parse(&parse_ctx);

2710 if (errcode != REG_OK)	2528 if (errcode != REG_OK)

(...skipping 12 matching lines...) Expand all Loading...
2723 /* Allocate the TNFA struct. */	2541 /* Allocate the TNFA struct. */

2724 tnfa = xcalloc(1, sizeof(tre_tnfa_t));	2542 tnfa = xcalloc(1, sizeof(tre_tnfa_t));

2725 if (tnfa == NULL)	2543 if (tnfa == NULL)

2726 ERROR_EXIT(REG_ESPACE);	2544 ERROR_EXIT(REG_ESPACE);

2727 tnfa->have_backrefs = parse_ctx.max_backref >= 0;	2545 tnfa->have_backrefs = parse_ctx.max_backref >= 0;

2728 tnfa->have_approx = 0;	2546 tnfa->have_approx = 0;

2729 tnfa->num_submatches = parse_ctx.submatch_id;	2547 tnfa->num_submatches = parse_ctx.submatch_id;

2730	2548

2731 /* Set up tags for submatch addressing. If REG_NOSUB is set and the	2549 /* Set up tags for submatch addressing. If REG_NOSUB is set and the

2732 regexp does not have back references, this can be skipped. */	2550 regexp does not have back references, this can be skipped. */

2733 if (tnfa->have_backrefs \|\| !(cflags & REG_NOSUB))	2551 if (tnfa->have_backrefs \|\| !(cflags & REG_NOSUB)) {

2734 {	2552 /* Figure out how many tags we will need. */

	2553 errcode = tre_add_tags(NULL, stack, tree, tnfa);

	2554 if (errcode != REG_OK)

	2555 ERROR_EXIT(errcode);

2735	2556

2736 /* Figure out how many tags we will need. */	2557 if (tnfa->num_tags > 0) {

2737 errcode = tre_add_tags(NULL, stack, tree, tnfa);	2558 tag_directions = xmalloc(sizeof(tag_directions) (tnfa->num_tags + 1));

2738 if (errcode != REG_OK)	2559 if (tag_directions == NULL)

2739 » ERROR_EXIT(errcode);	2560 ERROR_EXIT(REG_ESPACE);

	2561 tnfa->tag_directions = tag_directions;

	2562 memset(tag_directions, -1,

	2563 sizeof(tag_directions) (tnfa->num_tags + 1));

	2564 }

	2565 tnfa->minimal_tags =

	2566 xcalloc((unsigned)tnfa->num_tags * 2 + 1, sizeof(*tnfa->minimal_tags));

	2567 if (tnfa->minimal_tags == NULL)

	2568 ERROR_EXIT(REG_ESPACE);

2740	2569

2741 if (tnfa->num_tags > 0)	2570 submatch_data =

2742 » {	2571 xcalloc((unsigned)parse_ctx.submatch_id, sizeof(*submatch_data));

2743 » tag_directions = xmalloc(sizeof(*tag_directions)	2572 if (submatch_data == NULL)

2744 » » » » * (tnfa->num_tags + 1));	2573 ERROR_EXIT(REG_ESPACE);

2745 » if (tag_directions == NULL)	2574 tnfa->submatch_data = submatch_data;

2746 » ERROR_EXIT(REG_ESPACE);

2747 » tnfa->tag_directions = tag_directions;

2748 » memset(tag_directions, -1,

2749 » » sizeof(tag_directions) (tnfa->num_tags + 1));

2750 » }

2751 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,

2752 » » » » sizeof(*tnfa->minimal_tags));

2753 if (tnfa->minimal_tags == NULL)

2754 » ERROR_EXIT(REG_ESPACE);

2755	2575

2756 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,	2576 errcode = tre_add_tags(mem, stack, tree, tnfa);

2757 » » » sizeof(*submatch_data));	2577 if (errcode != REG_OK)

2758 if (submatch_data == NULL)	2578 ERROR_EXIT(errcode);

2759 » ERROR_EXIT(REG_ESPACE);	2579 }

2760 tnfa->submatch_data = submatch_data;

2761

2762 errcode = tre_add_tags(mem, stack, tree, tnfa);

2763 if (errcode != REG_OK)

2764 » ERROR_EXIT(errcode);

2765

2766 }

2767	2580

2768 /* Expand iteration nodes. */	2581 /* Expand iteration nodes. */

2769 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,	2582 errcode =

2770 » » » tag_directions);	2583 tre_expand_ast(mem, stack, tree, &parse_ctx.position, tag_directions);

2771 if (errcode != REG_OK)	2584 if (errcode != REG_OK)

2772 ERROR_EXIT(errcode);	2585 ERROR_EXIT(errcode);

2773	2586

2774 /* Add a dummy node for the final state.	2587 /* Add a dummy node for the final state.

2775 XXX - For certain patterns this dummy node can be optimized away,	2588 XXX - For certain patterns this dummy node can be optimized away,

2776 » for example "a" or "ab".» Figure out a simple way to detect	2589 for example "a" or "ab".» Figure out a simple way to detect

2777 » this possibility. */	2590 this possibility. */

2778 tmp_ast_l = tree;	2591 tmp_ast_l = tree;

2779 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);	2592 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);

2780 if (tmp_ast_r == NULL)	2593 if (tmp_ast_r == NULL)

2781 ERROR_EXIT(REG_ESPACE);	2594 ERROR_EXIT(REG_ESPACE);

2782	2595

2783 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);	2596 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);

2784 if (tree == NULL)	2597 if (tree == NULL)

2785 ERROR_EXIT(REG_ESPACE);	2598 ERROR_EXIT(REG_ESPACE);

2786	2599

2787 errcode = tre_compute_nfl(mem, stack, tree);	2600 errcode = tre_compute_nfl(mem, stack, tree);

2788 if (errcode != REG_OK)	2601 if (errcode != REG_OK)

2789 ERROR_EXIT(errcode);	2602 ERROR_EXIT(errcode);

2790	2603

2791 counts = xmalloc(sizeof(int) * parse_ctx.position);	2604 counts = xmalloc(sizeof(int) * parse_ctx.position);

2792 if (counts == NULL)	2605 if (counts == NULL)

2793 ERROR_EXIT(REG_ESPACE);	2606 ERROR_EXIT(REG_ESPACE);

2794	2607

2795 offs = xmalloc(sizeof(int) * parse_ctx.position);	2608 offs = xmalloc(sizeof(int) * parse_ctx.position);

2796 if (offs == NULL)	2609 if (offs == NULL)

2797 ERROR_EXIT(REG_ESPACE);	2610 ERROR_EXIT(REG_ESPACE);

2798	2611

2799 for (i = 0; i < parse_ctx.position; i++)	2612 for (i = 0; i < parse_ctx.position; i++)

2800 counts[i] = 0;	2613 counts[i] = 0;

2801 tre_ast_to_tnfa(tree, NULL, counts, NULL);	2614 tre_ast_to_tnfa(tree, NULL, counts, NULL);

2802	2615

2803 add = 0;	2616 add = 0;

2804 for (i = 0; i < parse_ctx.position; i++)	2617 for (i = 0; i < parse_ctx.position; i++) {

2805 {	2618 offs[i] = add;

2806 offs[i] = add;	2619 add += counts[i] + 1;

2807 add += counts[i] + 1;	2620 counts[i] = 0;

2808 counts[i] = 0;	2621 }

2809 }

2810 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));	2622 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));

2811 if (transitions == NULL)	2623 if (transitions == NULL)

2812 ERROR_EXIT(REG_ESPACE);	2624 ERROR_EXIT(REG_ESPACE);

2813 tnfa->transitions = transitions;	2625 tnfa->transitions = transitions;

2814 tnfa->num_transitions = add;	2626 tnfa->num_transitions = add;

2815	2627

2816 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);	2628 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);

2817 if (errcode != REG_OK)	2629 if (errcode != REG_OK)

2818 ERROR_EXIT(errcode);	2630 ERROR_EXIT(errcode);

2819	2631

2820 tnfa->firstpos_chars = NULL;	2632 tnfa->firstpos_chars = NULL;

2821	2633

2822 p = tree->firstpos;	2634 p = tree->firstpos;

2823 i = 0;	2635 i = 0;

2824 while (p->position >= 0)	2636 while (p->position >= 0) {

2825 {	2637 i++;

2826 i++;	2638 p++;

2827 p++;	2639 }

2828 }

2829	2640

2830 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));	2641 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));

2831 if (initial == NULL)	2642 if (initial == NULL)

2832 ERROR_EXIT(REG_ESPACE);	2643 ERROR_EXIT(REG_ESPACE);

2833 tnfa->initial = initial;	2644 tnfa->initial = initial;

2834	2645

2835 i = 0;	2646 i = 0;

2836 for (p = tree->firstpos; p->position >= 0; p++)	2647 for (p = tree->firstpos; p->position >= 0; p++) {

2837 {	2648 initial[i].state = transitions + offs[p->position];

2838 initial[i].state = transitions + offs[p->position];	2649 initial[i].state_id = p->position;

2839 initial[i].state_id = p->position;	2650 initial[i].tags = NULL;

2840 initial[i].tags = NULL;	2651 /* Copy the arrays p->tags, and p->params, they are allocated

2841 /* Copy the arrays p->tags, and p->params, they are allocated	2652 from a tre_mem object. */

2842 » from a tre_mem object. */	2653 if (p->tags) {

2843 if (p->tags)	2654 int j;

2844 » {	2655 for (j = 0; p->tags[j] >= 0; j++)

2845 » int j;	2656 ;

2846 » for (j = 0; p->tags[j] >= 0; j++);	2657 initial[i].tags = xmalloc(sizeof(p->tags) (j + 1));

2847 » initial[i].tags = xmalloc(sizeof(p->tags) (j + 1));	2658 if (!initial[i].tags)

2848 » if (!initial[i].tags)	2659 ERROR_EXIT(REG_ESPACE);

2849 » ERROR_EXIT(REG_ESPACE);	2660 memcpy(initial[i].tags, p->tags, sizeof(p->tags) (j + 1));

2850 » memcpy(initial[i].tags, p->tags, sizeof(p->tags) (j + 1));

2851 » }

2852 initial[i].assertions = p->assertions;

2853 i++;

2854 }	2661 }

	2662 initial[i].assertions = p->assertions;

	2663 i++;

	2664 }

2855 initial[i].state = NULL;	2665 initial[i].state = NULL;

2856	2666

2857 tnfa->num_transitions = add;	2667 tnfa->num_transitions = add;

2858 tnfa->final = transitions + offs[tree->lastpos[0].position];	2668 tnfa->final = transitions + offs[tree->lastpos[0].position];

2859 tnfa->num_states = parse_ctx.position;	2669 tnfa->num_states = parse_ctx.position;

2860 tnfa->cflags = cflags;	2670 tnfa->cflags = cflags;

2861	2671

2862 tre_mem_destroy(mem);	2672 tre_mem_destroy(mem);

2863 tre_stack_destroy(stack);	2673 tre_stack_destroy(stack);

2864 xfree(counts);	2674 xfree(counts);

2865 xfree(offs);	2675 xfree(offs);

2866	2676

2867 preg->TRE_REGEX_T_FIELD = (void *)tnfa;	2677 preg->TRE_REGEX_T_FIELD = (void*)tnfa;

2868 return REG_OK;	2678 return REG_OK;

2869	2679

2870 error_exit:	2680 error_exit:

2871 /* Free everything that was allocated and return the error code. */	2681 /* Free everything that was allocated and return the error code. */

2872 tre_mem_destroy(mem);	2682 tre_mem_destroy(mem);

2873 if (stack != NULL)	2683 if (stack != NULL)

2874 tre_stack_destroy(stack);	2684 tre_stack_destroy(stack);

2875 if (counts != NULL)	2685 if (counts != NULL)

2876 xfree(counts);	2686 xfree(counts);

2877 if (offs != NULL)	2687 if (offs != NULL)

2878 xfree(offs);	2688 xfree(offs);

2879 preg->TRE_REGEX_T_FIELD = (void *)tnfa;	2689 preg->TRE_REGEX_T_FIELD = (void*)tnfa;

2880 regfree(preg);	2690 regfree(preg);

2881 return errcode;	2691 return errcode;

2882 }	2692 }

2883	2693

	2694 void regfree(regex_t* preg) {

	2695 tre_tnfa_t* tnfa;

	2696 unsigned int i;

	2697 tre_tnfa_transition_t* trans;

2884	2698

2885	2699 tnfa = (void*)preg->TRE_REGEX_T_FIELD;

2886

2887 void

2888 regfree(regex_t *preg)

2889 {

2890 tre_tnfa_t *tnfa;

2891 unsigned int i;

2892 tre_tnfa_transition_t *trans;

2893

2894 tnfa = (void *)preg->TRE_REGEX_T_FIELD;

2895 if (!tnfa)	2700 if (!tnfa)

2896 return;	2701 return;

2897	2702

2898 for (i = 0; i < tnfa->num_transitions; i++)	2703 for (i = 0; i < tnfa->num_transitions; i++)

2899 if (tnfa->transitions[i].state)	2704 if (tnfa->transitions[i].state) {

2900 {	2705 if (tnfa->transitions[i].tags)

2901 » if (tnfa->transitions[i].tags)	2706 xfree(tnfa->transitions[i].tags);

2902 » xfree(tnfa->transitions[i].tags);	2707 if (tnfa->transitions[i].neg_classes)

2903 » if (tnfa->transitions[i].neg_classes)	2708 xfree(tnfa->transitions[i].neg_classes);

2904 » xfree(tnfa->transitions[i].neg_classes);	2709 }

2905 }

2906 if (tnfa->transitions)	2710 if (tnfa->transitions)

2907 xfree(tnfa->transitions);	2711 xfree(tnfa->transitions);

2908	2712

2909 if (tnfa->initial)	2713 if (tnfa->initial) {

2910 {	2714 for (trans = tnfa->initial; trans->state; trans++) {

2911 for (trans = tnfa->initial; trans->state; trans++)	2715 if (trans->tags)

2912 » {	2716 xfree(trans->tags);

2913 » if (trans->tags)

2914 » xfree(trans->tags);

2915 » }

2916 xfree(tnfa->initial);

2917 }	2717 }

	2718 xfree(tnfa->initial);

	2719 }

2918	2720

2919 if (tnfa->submatch_data)	2721 if (tnfa->submatch_data) {

2920 {	2722 for (i = 0; i < tnfa->num_submatches; i++)

2921 for (i = 0; i < tnfa->num_submatches; i++)	2723 if (tnfa->submatch_data[i].parents)

2922 » if (tnfa->submatch_data[i].parents)	2724 xfree(tnfa->submatch_data[i].parents);

2923 » xfree(tnfa->submatch_data[i].parents);	2725 xfree(tnfa->submatch_data);

2924 xfree(tnfa->submatch_data);	2726 }

2925 }

2926	2727

2927 if (tnfa->tag_directions)	2728 if (tnfa->tag_directions)

2928 xfree(tnfa->tag_directions);	2729 xfree(tnfa->tag_directions);

2929 if (tnfa->firstpos_chars)	2730 if (tnfa->firstpos_chars)

2930 xfree(tnfa->firstpos_chars);	2731 xfree(tnfa->firstpos_chars);

2931 if (tnfa->minimal_tags)	2732 if (tnfa->minimal_tags)

2932 xfree(tnfa->minimal_tags);	2733 xfree(tnfa->minimal_tags);

2933 xfree(tnfa);	2734 xfree(tnfa);

2934 }	2735 }

OLD	NEW

« fusl/arch/aarch64/atomic_arch.h ('K') | « fusl/src/regex/glob.c ('k') | fusl/src/regex/regerror.c » ('j') | no next file with comments »