Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: fusl/src/regex/regcomp.c

Issue 1706293003: [fusl] Remove some more tabs (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: apptest rebase Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « fusl/src/network/lookup_name.c ('k') | fusl/src/regex/regexec.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 regcomp.c - TRE POSIX compatible regex compilation functions. 2 regcomp.c - TRE POSIX compatible regex compilation functions.
3 3
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5 All rights reserved. 5 All rights reserved.
6 6
7 Redistribution and use in source and binary forms, with or without 7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions 8 modification, are permitted provided that the following conditions
9 are met: 9 are met:
10 10
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 tags, matching parameter settings, and all expressions that match one 91 tags, matching parameter settings, and all expressions that match one
92 character. */ 92 character. */
93 typedef struct { 93 typedef struct {
94 long code_min; 94 long code_min;
95 long code_max; 95 long code_max;
96 int position; 96 int position;
97 tre_ctype_t class; 97 tre_ctype_t class;
98 tre_ctype_t* neg_classes; 98 tre_ctype_t* neg_classes;
99 } tre_literal_t; 99 } tre_literal_t;
100 100
101 /* A "catenation" node.» These are created when two regexps are concatenated. 101 /* A "catenation" node. These are created when two regexps are concatenated.
102 If there are more than one subexpressions in sequence, the `left' part 102 If there are more than one subexpressions in sequence, the `left' part
103 holds all but the last, and `right' part holds the last subexpression 103 holds all but the last, and `right' part holds the last subexpression
104 (catenation is left associative). */ 104 (catenation is left associative). */
105 typedef struct { 105 typedef struct {
106 tre_ast_node_t* left; 106 tre_ast_node_t* left;
107 tre_ast_node_t* right; 107 tre_ast_node_t* right;
108 } tre_catenation_t; 108 } tre_catenation_t;
109 109
110 /* An "iteration" node.» These are created for the "*", "+", "?", and "{m,n}" 110 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
111 operators. */ 111 operators. */
112 typedef struct { 112 typedef struct {
113 /* Subexpression to match. */ 113 /* Subexpression to match. */
114 tre_ast_node_t* arg; 114 tre_ast_node_t* arg;
115 /* Minimum number of consecutive matches. */ 115 /* Minimum number of consecutive matches. */
116 int min; 116 int min;
117 /* Maximum number of consecutive matches. */ 117 /* Maximum number of consecutive matches. */
118 int max; 118 int max;
119 /* If 0, match as many characters as possible, if 1 match as few as 119 /* If 0, match as many characters as possible, if 1 match as few as
120 possible.» Note that this does not always mean the same thing as 120 possible. Note that this does not always mean the same thing as
121 matching as many/few repetitions as possible. */ 121 matching as many/few repetitions as possible. */
122 unsigned int minimal : 1; 122 unsigned int minimal : 1;
123 } tre_iteration_t; 123 } tre_iteration_t;
124 124
125 /* An "union" node. These are created for the "|" operator. */ 125 /* An "union" node. These are created for the "|" operator. */
126 typedef struct { 126 typedef struct {
127 tre_ast_node_t* left; 127 tre_ast_node_t* left;
128 tre_ast_node_t* right; 128 tre_ast_node_t* right;
129 } tre_union_t; 129 } tre_union_t;
130 130
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
211 node->num_submatches = left->num_submatches + right->num_submatches; 211 node->num_submatches = left->num_submatches + right->num_submatches;
212 return node; 212 return node;
213 } 213 }
214 214
215 /*********************************************************************** 215 /***********************************************************************
216 from tre-stack.c and tre-stack.h 216 from tre-stack.c and tre-stack.h
217 ***********************************************************************/ 217 ***********************************************************************/
218 218
219 typedef struct tre_stack_rec tre_stack_t; 219 typedef struct tre_stack_rec tre_stack_t;
220 220
221 /* Creates a new stack object.» `size' is initial size in bytes, `max_size' 221 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
222 is maximum size, and `increment' specifies how much more space will be 222 is maximum size, and `increment' specifies how much more space will be
223 allocated with realloc() if all space gets used up.» Returns the stack 223 allocated with realloc() if all space gets used up. Returns the stack
224 object or NULL if out of memory. */ 224 object or NULL if out of memory. */
225 static tre_stack_t* tre_stack_new(int size, int max_size, int increment); 225 static tre_stack_t* tre_stack_new(int size, int max_size, int increment);
226 226
227 /* Frees the stack object. */ 227 /* Frees the stack object. */
228 static void tre_stack_destroy(tre_stack_t* s); 228 static void tre_stack_destroy(tre_stack_t* s);
229 229
230 /* Returns the current number of objects in the stack. */ 230 /* Returns the current number of objects in the stack. */
231 static int tre_stack_num_objects(tre_stack_t* s); 231 static int tre_stack_num_objects(tre_stack_t* s);
232 232
233 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes 233 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
(...skipping 1245 matching lines...) Expand 10 before | Expand all | Expand 10 after
1479 case ADDTAGS_AFTER_CAT_RIGHT: 1479 case ADDTAGS_AFTER_CAT_RIGHT:
1480 node = tre_stack_pop_voidptr(stack); 1480 node = tre_stack_pop_voidptr(stack);
1481 if (first_pass) 1481 if (first_pass)
1482 node->num_tags = ((tre_catenation_t*)node->obj)->left->num_tags + 1482 node->num_tags = ((tre_catenation_t*)node->obj)->left->num_tags +
1483 ((tre_catenation_t*)node->obj)->right->num_tags; 1483 ((tre_catenation_t*)node->obj)->right->num_tags;
1484 break; 1484 break;
1485 1485
1486 case ADDTAGS_AFTER_UNION_LEFT: 1486 case ADDTAGS_AFTER_UNION_LEFT:
1487 /* Lift the bottom of the `regset' array so that when processing 1487 /* Lift the bottom of the `regset' array so that when processing
1488 the right operand the items currently in the array are 1488 the right operand the items currently in the array are
1489 invisible.» The original bottom was saved at ADDTAGS_UNION and 1489 invisible. The original bottom was saved at ADDTAGS_UNION and
1490 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ 1490 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1491 while (*regset >= 0) 1491 while (*regset >= 0)
1492 regset++; 1492 regset++;
1493 break; 1493 break;
1494 1494
1495 case ADDTAGS_AFTER_UNION_RIGHT: { 1495 case ADDTAGS_AFTER_UNION_RIGHT: {
1496 int added_tags, tag_left, tag_right; 1496 int added_tags, tag_left, tag_right;
1497 tre_ast_node_t* left = tre_stack_pop_voidptr(stack); 1497 tre_ast_node_t* left = tre_stack_pop_voidptr(stack);
1498 tre_ast_node_t* right = tre_stack_pop_voidptr(stack); 1498 tre_ast_node_t* right = tre_stack_pop_voidptr(stack);
1499 node = tre_stack_pop_voidptr(stack); 1499 node = tre_stack_pop_voidptr(stack);
1500 added_tags = tre_stack_pop_int(stack); 1500 added_tags = tre_stack_pop_int(stack);
1501 if (first_pass) { 1501 if (first_pass) {
1502 node->num_tags = ((tre_union_t*)node->obj)->left->num_tags + 1502 node->num_tags = ((tre_union_t*)node->obj)->left->num_tags +
1503 ((tre_union_t*)node->obj)->right->num_tags + 1503 ((tre_union_t*)node->obj)->right->num_tags +
1504 added_tags + ((node->num_submatches > 0) ? 2 : 0); 1504 added_tags + ((node->num_submatches > 0) ? 2 : 0);
1505 } 1505 }
1506 regset = tre_stack_pop_voidptr(stack); 1506 regset = tre_stack_pop_voidptr(stack);
1507 tag_left = tre_stack_pop_int(stack); 1507 tag_left = tre_stack_pop_int(stack);
1508 tag_right = tre_stack_pop_int(stack); 1508 tag_right = tre_stack_pop_int(stack);
1509 1509
1510 /* Add tags after both children, the left child gets a smaller 1510 /* Add tags after both children, the left child gets a smaller
1511 tag than the right child. This guarantees that we prefer 1511 tag than the right child. This guarantees that we prefer
1512 the left child over the right child. */ 1512 the left child over the right child. */
1513 /* XXX - This is not always necessary (if the children have 1513 /* XXX - This is not always necessary (if the children have
1514 tags which must be seen for every match of that child). */ 1514 tags which must be seen for every match of that child). */
1515 /* XXX - Check if this is the only place where tre_add_tag_right 1515 /* XXX - Check if this is the only place where tre_add_tag_right
1516 is used.» If so, use tre_add_tag_left (putting the tag before 1516 is used. If so, use tre_add_tag_left (putting the tag before
1517 the child as opposed after the child) and throw away 1517 the child as opposed after the child) and throw away
1518 tre_add_tag_right. */ 1518 tre_add_tag_right. */
1519 if (node->num_submatches > 0) { 1519 if (node->num_submatches > 0) {
1520 if (!first_pass) { 1520 if (!first_pass) {
1521 status = tre_add_tag_right(mem, left, tag_left); 1521 status = tre_add_tag_right(mem, left, tag_left);
1522 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; 1522 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1523 if (status == REG_OK) 1523 if (status == REG_OK)
1524 status = tre_add_tag_right(mem, right, tag_right); 1524 status = tre_add_tag_right(mem, right, tag_right);
1525 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; 1525 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1526 } 1526 }
(...skipping 777 matching lines...) Expand 10 before | Expand all | Expand 10 after
2304 /* Optimization: if this position was already handled, skip it. */ 2304 /* Optimization: if this position was already handled, skip it. */
2305 if (p2->position == prev_p2_pos) { 2305 if (p2->position == prev_p2_pos) {
2306 p2++; 2306 p2++;
2307 continue; 2307 continue;
2308 } 2308 }
2309 prev_p2_pos = p2->position; 2309 prev_p2_pos = p2->position;
2310 /* Set `trans' to point to the next unused transition from 2310 /* Set `trans' to point to the next unused transition from
2311 position `p1->position'. */ 2311 position `p1->position'. */
2312 trans = transitions + offs[p1->position]; 2312 trans = transitions + offs[p1->position];
2313 while (trans->state != NULL) { 2313 while (trans->state != NULL) {
2314 #if 0
2315 /* If we find a previous transition from `p1->position' to
2316 `p2->position', it is overwritten. This can happen only
2317 if there are nested loops in the regexp, like in "((a)*)*".
2318 In POSIX.2 repetition using the outer loop is always
2319 preferred over using the inner loop. Therefore the
2320 transition for the inner loop is useless and can be thrown
2321 away. */
2322 /* XXX - The same position is used for all nodes in a bracket
2323 expression, so this optimization cannot be used (it will
2324 break bracket expressions) unless I figure out a way to
2325 detect it here. */
2326 if (trans->state_id == p2->position)
2327 {
2328 break;
2329 }
2330 #endif
2331 trans++; 2314 trans++;
2332 } 2315 }
2333 2316
2334 if (trans->state == NULL) 2317 if (trans->state == NULL)
2335 (trans + 1)->state = NULL; 2318 (trans + 1)->state = NULL;
2336 /* Use the character ranges, assertions, etc. from `p1' for 2319 /* Use the character ranges, assertions, etc. from `p1' for
2337 the transition from `p1' to `p2'. */ 2320 the transition from `p1' to `p2'. */
2338 trans->code_min = p1->code_min; 2321 trans->code_min = p1->code_min;
2339 trans->code_max = p1->code_max; 2322 trans->code_max = p1->code_max;
2340 trans->state = transitions + offs[p2->position]; 2323 trans->state = transitions + offs[p2->position];
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
2417 p2 = orig_p2; 2400 p2 = orig_p2;
2418 while (p2->position >= 0) { 2401 while (p2->position >= 0) {
2419 counts[p1->position]++; 2402 counts[p1->position]++;
2420 p2++; 2403 p2++;
2421 } 2404 }
2422 p1++; 2405 p1++;
2423 } 2406 }
2424 return REG_OK; 2407 return REG_OK;
2425 } 2408 }
2426 2409
2427 /* Converts the syntax tree to a TNFA.» All the transitions in the TNFA are 2410 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2428 labelled with one character range (there are no transitions on empty 2411 labelled with one character range (there are no transitions on empty
2429 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of 2412 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2430 the regexp. */ 2413 the regexp. */
2431 static reg_errcode_t tre_ast_to_tnfa(tre_ast_node_t* node, 2414 static reg_errcode_t tre_ast_to_tnfa(tre_ast_node_t* node,
2432 tre_tnfa_transition_t* transitions, 2415 tre_tnfa_transition_t* transitions,
2433 int* counts, 2416 int* counts,
2434 int* offs) { 2417 int* offs) {
2435 tre_union_t* uni; 2418 tre_union_t* uni;
2436 tre_catenation_t* cat; 2419 tre_catenation_t* cat;
2437 tre_iteration_t* iter; 2420 tre_iteration_t* iter;
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
2579 } 2562 }
2580 2563
2581 /* Expand iteration nodes. */ 2564 /* Expand iteration nodes. */
2582 errcode = 2565 errcode =
2583 tre_expand_ast(mem, stack, tree, &parse_ctx.position, tag_directions); 2566 tre_expand_ast(mem, stack, tree, &parse_ctx.position, tag_directions);
2584 if (errcode != REG_OK) 2567 if (errcode != REG_OK)
2585 ERROR_EXIT(errcode); 2568 ERROR_EXIT(errcode);
2586 2569
2587 /* Add a dummy node for the final state. 2570 /* Add a dummy node for the final state.
2588 XXX - For certain patterns this dummy node can be optimized away, 2571 XXX - For certain patterns this dummy node can be optimized away,
2589 for example "a*" or "ab*".» Figure out a simple way to detect 2572 for example "a*" or "ab*". Figure out a simple way to detect
2590 this possibility. */ 2573 this possibility. */
2591 tmp_ast_l = tree; 2574 tmp_ast_l = tree;
2592 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++); 2575 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2593 if (tmp_ast_r == NULL) 2576 if (tmp_ast_r == NULL)
2594 ERROR_EXIT(REG_ESPACE); 2577 ERROR_EXIT(REG_ESPACE);
2595 2578
2596 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); 2579 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2597 if (tree == NULL) 2580 if (tree == NULL)
2598 ERROR_EXIT(REG_ESPACE); 2581 ERROR_EXIT(REG_ESPACE);
2599 2582
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
2726 } 2709 }
2727 2710
2728 if (tnfa->tag_directions) 2711 if (tnfa->tag_directions)
2729 xfree(tnfa->tag_directions); 2712 xfree(tnfa->tag_directions);
2730 if (tnfa->firstpos_chars) 2713 if (tnfa->firstpos_chars)
2731 xfree(tnfa->firstpos_chars); 2714 xfree(tnfa->firstpos_chars);
2732 if (tnfa->minimal_tags) 2715 if (tnfa->minimal_tags)
2733 xfree(tnfa->minimal_tags); 2716 xfree(tnfa->minimal_tags);
2734 xfree(tnfa); 2717 xfree(tnfa);
2735 } 2718 }
OLDNEW
« no previous file with comments | « fusl/src/network/lookup_name.c ('k') | fusl/src/regex/regexec.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698