Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(226)

Side by Side Diff: fusl/src/regex/regcomp.c

Issue 1689833004: [fusl] Update fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: remove stray space Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « fusl/src/network/resolvconf.c ('k') | fusl/src/stdio/fread.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 regcomp.c - TRE POSIX compatible regex compilation functions. 2 regcomp.c - TRE POSIX compatible regex compilation functions.
3 3
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5 All rights reserved. 5 All rights reserved.
6 6
7 Redistribution and use in source and binary forms, with or without 7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions 8 modification, are permitted provided that the following conditions
9 are met: 9 are met:
10 10
(...skipping 690 matching lines...) Expand 10 before | Expand all | Expand 10 after
701 *n = 0; 701 *n = 0;
702 for (;;) { 702 for (;;) {
703 *n = 10 * *n + (*s - '0'); 703 *n = 10 * *n + (*s - '0');
704 s++; 704 s++;
705 if (!isdigit(*s) || *n > RE_DUP_MAX) 705 if (!isdigit(*s) || *n > RE_DUP_MAX)
706 break; 706 break;
707 } 707 }
708 return s; 708 return s;
709 } 709 }
710 710
711 static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s) 711 static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
712 { 712 {
713 int min, max; 713 int min, max;
714 714
715 s = parse_dup_count(s, &min); 715 s = parse_dup_count(s, &min);
716 if (*s == ',') 716 if (*s == ',')
717 s = parse_dup_count(s+1, &max); 717 s = parse_dup_count(s+1, &max);
718 else 718 else
719 max = min; 719 max = min;
720 720
721 if ( 721 if (
722 (max < min && max >= 0) || 722 (max < min && max >= 0) ||
723 max > RE_DUP_MAX || 723 max > RE_DUP_MAX ||
724 min > RE_DUP_MAX || 724 min > RE_DUP_MAX ||
725 min < 0 || 725 min < 0 ||
726 » » (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') || 726 » » (!ere && *s++ != '\\') ||
727 *s++ != '}' 727 *s++ != '}'
728 ) 728 )
729 » » return REG_BADBR; 729 » » return 0;
730 730 » *pmin = min;
731 » if (min == 0 && max == 0) 731 » *pmax = max;
732 » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 732 » return s;
733 » else
734 » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
735 » if (!ctx->n)
736 » » return REG_ESPACE;
737 » ctx->s = s;
738 » return REG_OK;
739 } 733 }
740 734
741 static int hexval(unsigned c) 735 static int hexval(unsigned c)
742 { 736 {
743 if (c-'0'<10) return c-'0'; 737 if (c-'0'<10) return c-'0';
744 c |= 32; 738 c |= 32;
745 if (c-'a'<6) return c-'a'+10; 739 if (c-'a'<6) return c-'a'+10;
746 return -1; 740 return -1;
747 } 741 }
748 742
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
827 c = hexval(s[i]); 821 c = hexval(s[i]);
828 if (c < 0) break; 822 if (c < 0) break;
829 v = 16*v + c; 823 v = 16*v + c;
830 } 824 }
831 s += i; 825 s += i;
832 if (len == 8) { 826 if (len == 8) {
833 if (*s != '}') 827 if (*s != '}')
834 return REG_EBRACE; 828 return REG_EBRACE;
835 s++; 829 s++;
836 } 830 }
837 » » » node = tre_ast_new_literal(ctx->mem, v, v, ctx->position ); 831 » » » node = tre_ast_new_literal(ctx->mem, v, v, ctx->position ++);
838 » » » ctx->position++;
839 s--; 832 s--;
840 break; 833 break;
834 case '{':
835 case '+':
836 case '?':
837 /* extension: treat \+, \? as repetitions in BRE */
838 /* reject repetitions after empty expression in BRE */
839 if (!ere)
840 return REG_BADRPT;
841 case '|':
842 /* extension: treat \| as alternation in BRE */
843 if (!ere) {
844 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
845 s--;
846 goto end;
847 }
848 /* fallthrough */
841 default: 849 default:
842 if (!ere && (unsigned)*s-'1' < 9) { 850 if (!ere && (unsigned)*s-'1' < 9) {
843 /* back reference */ 851 /* back reference */
844 int val = *s - '0'; 852 int val = *s - '0';
845 » » » » node = tre_ast_new_literal(ctx->mem, BACKREF, va l, ctx->position); 853 » » » » node = tre_ast_new_literal(ctx->mem, BACKREF, va l, ctx->position++);
846 ctx->max_backref = MAX(val, ctx->max_backref); 854 ctx->max_backref = MAX(val, ctx->max_backref);
847 } else { 855 } else {
848 /* extension: accept unknown escaped char 856 /* extension: accept unknown escaped char
849 as a literal */ 857 as a literal */
850 goto parse_literal; 858 goto parse_literal;
851 } 859 }
852 ctx->position++;
853 } 860 }
854 s++; 861 s++;
855 break; 862 break;
856 case '.': 863 case '.':
857 if (ctx->cflags & REG_NEWLINE) { 864 if (ctx->cflags & REG_NEWLINE) {
858 tre_ast_node_t *tmp1, *tmp2; 865 tre_ast_node_t *tmp1, *tmp2;
859 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos ition++); 866 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos ition++);
860 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA X, ctx->position++); 867 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA X, ctx->position++);
861 if (tmp1 && tmp2) 868 if (tmp1 && tmp2)
862 node = tre_ast_new_union(ctx->mem, tmp1, tmp2); 869 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
(...skipping 12 matching lines...) Expand all
875 s++; 882 s++;
876 break; 883 break;
877 case '$': 884 case '$':
878 /* '$' is special everywhere in EREs, and in the end of the stri ng in BREs. */ 885 /* '$' is special everywhere in EREs, and in the end of the stri ng in BREs. */
879 if (!ere && s[1]) 886 if (!ere && s[1])
880 goto parse_literal; 887 goto parse_literal;
881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, - 1); 888 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, - 1);
882 s++; 889 s++;
883 break; 890 break;
884 case '*': 891 case '*':
885 » case '|': 892 » » return REG_BADPAT;
886 case '{': 893 case '{':
887 case '+': 894 case '+':
888 case '?': 895 case '?':
896 /* reject repetitions after empty expression in ERE */
897 if (ere)
898 return REG_BADRPT;
899 case '|':
889 if (!ere) 900 if (!ere)
890 goto parse_literal; 901 goto parse_literal;
891 case 0: 902 case 0:
892 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); 903 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
893 break; 904 break;
894 default: 905 default:
895 parse_literal: 906 parse_literal:
896 len = mbtowc(&wc, s, -1); 907 len = mbtowc(&wc, s, -1);
897 if (len < 0) 908 if (len < 0)
898 return REG_BADPAT; 909 return REG_BADPAT;
899 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(w c))) { 910 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(w c))) {
900 tre_ast_node_t *tmp1, *tmp2; 911 tre_ast_node_t *tmp1, *tmp2;
901 /* multiple opposite case characters are not supported * / 912 /* multiple opposite case characters are not supported * /
902 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr e_toupper(wc), ctx->position); 913 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr e_toupper(wc), ctx->position);
903 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr e_tolower(wc), ctx->position); 914 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr e_tolower(wc), ctx->position);
904 if (tmp1 && tmp2) 915 if (tmp1 && tmp2)
905 node = tre_ast_new_union(ctx->mem, tmp1, tmp2); 916 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
906 else 917 else
907 node = 0; 918 node = 0;
908 } else { 919 } else {
909 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi on); 920 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi on);
910 } 921 }
911 ctx->position++; 922 ctx->position++;
912 s += len; 923 s += len;
913 break; 924 break;
914 } 925 }
926 end:
915 if (!node) 927 if (!node)
916 return REG_ESPACE; 928 return REG_ESPACE;
917 ctx->n = node; 929 ctx->n = node;
918 ctx->s = s; 930 ctx->s = s;
919 return REG_OK; 931 return REG_OK;
920 } 932 }
921 933
922 #define PUSHPTR(err, s, v) do { \ 934 #define PUSHPTR(err, s, v) do { \
923 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \ 935 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
924 return err; \ 936 return err; \
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
959 if (!ctx->n) 971 if (!ctx->n)
960 return REG_ESPACE; 972 return REG_ESPACE;
961 } else { 973 } else {
962 err = parse_atom(ctx, s); 974 err = parse_atom(ctx, s);
963 if (err != REG_OK) 975 if (err != REG_OK)
964 return err; 976 return err;
965 s = ctx->s; 977 s = ctx->s;
966 } 978 }
967 979
968 parse_iter: 980 parse_iter:
969 » » /* extension: repetitions are accepted after an empty node 981 » » /* extension: repetitions are rejected after an empty node
970 » » eg. (+), ^*, a$?, a|{2} */ 982 » » eg. (+), |*, {2}, but assertions are not treated as empty
971 » » switch (*s) { 983 » » so ^* or $? are accepted currently. */
972 » » case '+': 984 » » for (;;) {
973 » » case '?': 985 » » » int min, max;
974 » » » if (!ere) 986
987 » » » if (*s!='\\' && *s!='*') {
988 » » » » if (!ere)
989 » » » » » break;
990 » » » » if (*s!='+' && *s!='?' && *s!='{')
991 » » » » » break;
992 » » » }
993 » » » if (*s=='\\' && ere)
975 break; 994 break;
976 » » » /* fallthrough */ 995 » » » /* extension: treat \+, \? as repetitions in BRE */
977 » » case '*':; 996 » » » if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
978 » » » int min=0, max=-1; 997 » » » » break;
979 » » » if (*s == '+') 998 » » » if (*s=='\\')
980 » » » » min = 1; 999 » » » » s++;
981 » » » if (*s == '?') 1000
982 » » » » max = 1;
983 » » » s++;
984 » » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0) ;
985 » » » if (!ctx->n)
986 » » » » return REG_ESPACE;
987 /* extension: multiple consecutive *+?{,} is unspecified , 1001 /* extension: multiple consecutive *+?{,} is unspecified ,
988 but (a+)+ has to be supported so accepting a++ makes 1002 but (a+)+ has to be supported so accepting a++ makes
989 sense, note however that the RE_DUP_MAX limit can be 1003 sense, note however that the RE_DUP_MAX limit can be
990 circumvented: (a{255}){255} uses a lot of memory.. */ 1004 circumvented: (a{255}){255} uses a lot of memory.. */
991 » » » goto parse_iter; 1005 » » » if (*s=='{') {
992 » » case '\\': 1006 » » » » s = parse_dup(s+1, ere, &min, &max);
993 » » » if (ere || s[1] != '{') 1007 » » » » if (!s)
994 » » » » break; 1008 » » » » » return REG_BADBR;
995 » » » s++; 1009 » » » } else {
996 » » » goto parse_brace; 1010 » » » » min=0;
997 » » case '{': 1011 » » » » max=-1;
998 » » » if (!ere) 1012 » » » » if (*s == '+')
999 » » » » break; 1013 » » » » » min = 1;
1000 » » parse_brace: 1014 » » » » if (*s == '?')
1001 » » » err = parse_dup(ctx, s+1); 1015 » » » » » max = 1;
1002 » » » if (err != REG_OK) 1016 » » » » s++;
1003 » » » » return err; 1017 » » » }
1004 » » » s = ctx->s; 1018 » » » if (max == 0)
1005 » » » goto parse_iter; 1019 » » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1 , -1);
1020 » » » else
1021 » » » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1022 » » » if (!ctx->n)
1023 » » » » return REG_ESPACE;
1006 } 1024 }
1007 1025
1008 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); 1026 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1009 if ((ere && *s == '|') || 1027 if ((ere && *s == '|') ||
1010 (ere && *s == ')' && depth) || 1028 (ere && *s == ')' && depth) ||
1011 (!ere && *s == '\\' && s[1] == ')') || 1029 (!ere && *s == '\\' && s[1] == ')') ||
1030 /* extension: treat \| as alternation in BRE */
1031 (!ere && *s == '\\' && s[1] == '|') ||
1012 !*s) { 1032 !*s) {
1013 /* extension: empty branch is unspecified (), (|a), (a|) 1033 /* extension: empty branch is unspecified (), (|a), (a|)
1014 here they are not rejected but match on empty string */ 1034 here they are not rejected but match on empty string */
1015 int c = *s; 1035 int c = *s;
1016 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch); 1036 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1017 nbranch = 0; 1037 nbranch = 0;
1018 » » » if (c != '|') { 1038
1039 » » » if (c == '\\' && s[1] == '|') {
1040 » » » » s+=2;
1041 » » » } else if (c == '|') {
1042 » » » » s++;
1043 » » » } else {
1019 if (c == '\\') { 1044 if (c == '\\') {
1020 if (!depth) return REG_EPAREN; 1045 if (!depth) return REG_EPAREN;
1021 s+=2; 1046 s+=2;
1022 } else if (c == ')') 1047 } else if (c == ')')
1023 s++; 1048 s++;
1024 depth--; 1049 depth--;
1025 err = marksub(ctx, nunion, tre_stack_pop_int(sta ck)); 1050 err = marksub(ctx, nunion, tre_stack_pop_int(sta ck));
1026 if (err != REG_OK) 1051 if (err != REG_OK)
1027 return err; 1052 return err;
1028 if (!c && depth<0) { 1053 if (!c && depth<0) {
1029 ctx->submatch_id = subid; 1054 ctx->submatch_id = subid;
1030 return REG_OK; 1055 return REG_OK;
1031 } 1056 }
1032 if (!c || depth<0) 1057 if (!c || depth<0)
1033 return REG_EPAREN; 1058 return REG_EPAREN;
1034 nbranch = tre_stack_pop_voidptr(stack); 1059 nbranch = tre_stack_pop_voidptr(stack);
1035 nunion = tre_stack_pop_voidptr(stack); 1060 nunion = tre_stack_pop_voidptr(stack);
1036 goto parse_iter; 1061 goto parse_iter;
1037 } 1062 }
1038 s++;
1039 } 1063 }
1040 } 1064 }
1041 } 1065 }
1042 1066
1043 1067
1044 /*********************************************************************** 1068 /***********************************************************************
1045 from tre-compile.c 1069 from tre-compile.c
1046 ***********************************************************************/ 1070 ***********************************************************************/
1047 1071
1048 1072
(...skipping 1608 matching lines...) Expand 10 before | Expand all | Expand 10 after
2657 tre_submatch_data_t *submatch_data; 2681 tre_submatch_data_t *submatch_data;
2658 tre_tag_direction_t *tag_directions = NULL; 2682 tre_tag_direction_t *tag_directions = NULL;
2659 reg_errcode_t errcode; 2683 reg_errcode_t errcode;
2660 tre_mem_t mem; 2684 tre_mem_t mem;
2661 2685
2662 /* Parse context. */ 2686 /* Parse context. */
2663 tre_parse_ctx_t parse_ctx; 2687 tre_parse_ctx_t parse_ctx;
2664 2688
2665 /* Allocate a stack used throughout the compilation process for various 2689 /* Allocate a stack used throughout the compilation process for various
2666 purposes. */ 2690 purposes. */
2667 stack = tre_stack_new(512, 10240, 128); 2691 stack = tre_stack_new(512, 1024000, 128);
2668 if (!stack) 2692 if (!stack)
2669 return REG_ESPACE; 2693 return REG_ESPACE;
2670 /* Allocate a fast memory allocator. */ 2694 /* Allocate a fast memory allocator. */
2671 mem = tre_mem_new(); 2695 mem = tre_mem_new();
2672 if (!mem) 2696 if (!mem)
2673 { 2697 {
2674 tre_stack_destroy(stack); 2698 tre_stack_destroy(stack);
2675 return REG_ESPACE; 2699 return REG_ESPACE;
2676 } 2700 }
2677 2701
(...skipping 223 matching lines...) Expand 10 before | Expand all | Expand 10 after
2901 } 2925 }
2902 2926
2903 if (tnfa->tag_directions) 2927 if (tnfa->tag_directions)
2904 xfree(tnfa->tag_directions); 2928 xfree(tnfa->tag_directions);
2905 if (tnfa->firstpos_chars) 2929 if (tnfa->firstpos_chars)
2906 xfree(tnfa->firstpos_chars); 2930 xfree(tnfa->firstpos_chars);
2907 if (tnfa->minimal_tags) 2931 if (tnfa->minimal_tags)
2908 xfree(tnfa->minimal_tags); 2932 xfree(tnfa->minimal_tags);
2909 xfree(tnfa); 2933 xfree(tnfa);
2910 } 2934 }
OLDNEW
« no previous file with comments | « fusl/src/network/resolvconf.c ('k') | fusl/src/stdio/fread.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698