fusl/src/regex/regcomp.c - Issue 1689833004: [fusl] Update fusl

Side by Side Diff: fusl/src/regex/regcomp.c

Issue 1689833004: [fusl] Update fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: remove stray space Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 regcomp.c - TRE POSIX compatible regex compilation functions.	2 regcomp.c - TRE POSIX compatible regex compilation functions.

3	3

4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>	4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>

5 All rights reserved.	5 All rights reserved.

6	6

7 Redistribution and use in source and binary forms, with or without	7 Redistribution and use in source and binary forms, with or without

8 modification, are permitted provided that the following conditions	8 modification, are permitted provided that the following conditions

9 are met:	9 are met:

10	10

(...skipping 690 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
701 *n = 0;	701 *n = 0;

702 for (;;) {	702 for (;;) {

703 n = 10 n + (s - '0');	703 n = 10 n + (s - '0');

704 s++;	704 s++;

705 if (!isdigit(s) \|\| n > RE_DUP_MAX)	705 if (!isdigit(s) \|\| n > RE_DUP_MAX)

706 break;	706 break;

707 }	707 }

708 return s;	708 return s;

709 }	709 }

710	710

711 static reg_errcode_t parse_dup(tre_parse_ctx_t ctx, const char s)	711 static const char parse_dup(const char s, int ere, int pmin, int pmax)

712 {	712 {

713 int min, max;	713 int min, max;

714	714

715 s = parse_dup_count(s, &min);	715 s = parse_dup_count(s, &min);

716 if (*s == ',')	716 if (*s == ',')

717 s = parse_dup_count(s+1, &max);	717 s = parse_dup_count(s+1, &max);

718 else	718 else

719 max = min;	719 max = min;

720	720

721 if (	721 if (

722 (max < min && max >= 0) \|\|	722 (max < min && max >= 0) \|\|

723 max > RE_DUP_MAX \|\|	723 max > RE_DUP_MAX \|\|

724 min > RE_DUP_MAX \|\|	724 min > RE_DUP_MAX \|\|

725 min < 0 \|\|	725 min < 0 \|\|

726 » » (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') \|\|	726 » » (!ere && *s++ != '\\') \|\|

727 *s++ != '}'	727 *s++ != '}'

728 )	728 )

729 » » return REG_BADBR;	729 » » return 0;

730	730 » *pmin = min;

731 » if (min == 0 && max == 0)	731 » *pmax = max;

732 » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);	732 » return s;

733 » else

734 » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);

735 » if (!ctx->n)

736 » » return REG_ESPACE;

737 » ctx->s = s;

738 » return REG_OK;

739 }	733 }

740	734

741 static int hexval(unsigned c)	735 static int hexval(unsigned c)

742 {	736 {

743 if (c-'0'<10) return c-'0';	737 if (c-'0'<10) return c-'0';

744 c \|= 32;	738 c \|= 32;

745 if (c-'a'<6) return c-'a'+10;	739 if (c-'a'<6) return c-'a'+10;

746 return -1;	740 return -1;

747 }	741 }

748	742

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
827 c = hexval(s[i]);	821 c = hexval(s[i]);

828 if (c < 0) break;	822 if (c < 0) break;

829 v = 16*v + c;	823 v = 16*v + c;

830 }	824 }

831 s += i;	825 s += i;

832 if (len == 8) {	826 if (len == 8) {

833 if (*s != '}')	827 if (*s != '}')

834 return REG_EBRACE;	828 return REG_EBRACE;

835 s++;	829 s++;

836 }	830 }

837 » » » node = tre_ast_new_literal(ctx->mem, v, v, ctx->position );	831 » » » node = tre_ast_new_literal(ctx->mem, v, v, ctx->position ++);

838 » » » ctx->position++;

839 s--;	832 s--;

840 break;	833 break;

	834 case '{':

	835 case '+':

	836 case '?':

	837 /* extension: treat \+, \? as repetitions in BRE */

	838 /* reject repetitions after empty expression in BRE */

	839 if (!ere)

	840 return REG_BADRPT;

	841 case '\|':

	842 /* extension: treat \\| as alternation in BRE */

	843 if (!ere) {

	844 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

	845 s--;

	846 goto end;

	847 }

	848 /* fallthrough */

841 default:	849 default:

842 if (!ere && (unsigned)*s-'1' < 9) {	850 if (!ere && (unsigned)*s-'1' < 9) {

843 /* back reference */	851 /* back reference */

844 int val = *s - '0';	852 int val = *s - '0';

845 » » » » node = tre_ast_new_literal(ctx->mem, BACKREF, va l, ctx->position);	853 » » » » node = tre_ast_new_literal(ctx->mem, BACKREF, va l, ctx->position++);

846 ctx->max_backref = MAX(val, ctx->max_backref);	854 ctx->max_backref = MAX(val, ctx->max_backref);

847 } else {	855 } else {

848 /* extension: accept unknown escaped char	856 /* extension: accept unknown escaped char

849 as a literal */	857 as a literal */

850 goto parse_literal;	858 goto parse_literal;

851 }	859 }

852 ctx->position++;

853 }	860 }

854 s++;	861 s++;

855 break;	862 break;

856 case '.':	863 case '.':

857 if (ctx->cflags & REG_NEWLINE) {	864 if (ctx->cflags & REG_NEWLINE) {

858 tre_ast_node_t tmp1, tmp2;	865 tre_ast_node_t tmp1, tmp2;

859 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos ition++);	866 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->pos ition++);

860 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA X, ctx->position++);	867 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MA X, ctx->position++);

861 if (tmp1 && tmp2)	868 if (tmp1 && tmp2)

862 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);	869 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);

(...skipping 12 matching lines...) Expand all Loading...
875 s++;	882 s++;

876 break;	883 break;

877 case '$':	884 case '$':

878 /* '$' is special everywhere in EREs, and in the end of the stri ng in BREs. */	885 /* '$' is special everywhere in EREs, and in the end of the stri ng in BREs. */

879 if (!ere && s[1])	886 if (!ere && s[1])

880 goto parse_literal;	887 goto parse_literal;

881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, - 1);	888 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, - 1);

882 s++;	889 s++;

883 break;	890 break;

884 case '*':	891 case '*':

885 » case '\|':	892 » » return REG_BADPAT;

886 case '{':	893 case '{':

887 case '+':	894 case '+':

888 case '?':	895 case '?':

	896 /* reject repetitions after empty expression in ERE */

	897 if (ere)

	898 return REG_BADRPT;

	899 case '\|':

889 if (!ere)	900 if (!ere)

890 goto parse_literal;	901 goto parse_literal;

891 case 0:	902 case 0:

892 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);	903 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);

893 break;	904 break;

894 default:	905 default:

895 parse_literal:	906 parse_literal:

896 len = mbtowc(&wc, s, -1);	907 len = mbtowc(&wc, s, -1);

897 if (len < 0)	908 if (len < 0)

898 return REG_BADPAT;	909 return REG_BADPAT;

899 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) \|\| tre_islower(w c))) {	910 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) \|\| tre_islower(w c))) {

900 tre_ast_node_t tmp1, tmp2;	911 tre_ast_node_t tmp1, tmp2;

901 /* multiple opposite case characters are not supported * /	912 /* multiple opposite case characters are not supported * /

902 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr e_toupper(wc), ctx->position);	913 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tr e_toupper(wc), ctx->position);

903 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr e_tolower(wc), ctx->position);	914 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tr e_tolower(wc), ctx->position);

904 if (tmp1 && tmp2)	915 if (tmp1 && tmp2)

905 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);	916 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);

906 else	917 else

907 node = 0;	918 node = 0;

908 } else {	919 } else {

909 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi on);	920 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->positi on);

910 }	921 }

911 ctx->position++;	922 ctx->position++;

912 s += len;	923 s += len;

913 break;	924 break;

914 }	925 }

	926 end:

915 if (!node)	927 if (!node)

916 return REG_ESPACE;	928 return REG_ESPACE;

917 ctx->n = node;	929 ctx->n = node;

918 ctx->s = s;	930 ctx->s = s;

919 return REG_OK;	931 return REG_OK;

920 }	932 }

921	933

922 #define PUSHPTR(err, s, v) do { \	934 #define PUSHPTR(err, s, v) do { \

923 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \	935 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \

924 return err; \	936 return err; \

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
959 if (!ctx->n)	971 if (!ctx->n)

960 return REG_ESPACE;	972 return REG_ESPACE;

961 } else {	973 } else {

962 err = parse_atom(ctx, s);	974 err = parse_atom(ctx, s);

963 if (err != REG_OK)	975 if (err != REG_OK)

964 return err;	976 return err;

965 s = ctx->s;	977 s = ctx->s;

966 }	978 }

967	979

968 parse_iter:	980 parse_iter:

969 » » /* extension: repetitions are accepted after an empty node	981 » » /* extension: repetitions are rejected after an empty node

970 » » eg. (+), ^, a$?, a\|{2} /	982 » » eg. (+), \|*, {2}, but assertions are not treated as empty

971 » » switch (*s) {	983 » » so ^* or $? are accepted currently. */

972 » » case '+':	984 » » for (;;) {

973 » » case '?':	985 » » » int min, max;

974 » » » if (!ere)	986

	987 » » » if (s!='\\' && s!='*') {

	988 » » » » if (!ere)

	989 » » » » » break;

	990 » » » » if (s!='+' && s!='?' && *s!='{')

	991 » » » » » break;

	992 » » » }

	993 » » » if (*s=='\\' && ere)

975 break;	994 break;

976 » » » /* fallthrough */	995 » » » /* extension: treat \+, \? as repetitions in BRE */

977 » » case '*':;	996 » » » if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')

978 » » » int min=0, max=-1;	997 » » » » break;

979 » » » if (*s == '+')	998 » » » if (*s=='\\')

980 » » » » min = 1;	999 » » » » s++;

981 » » » if (*s == '?')	1000

982 » » » » max = 1;

983 » » » s++;

984 » » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0) ;

985 » » » if (!ctx->n)

986 » » » » return REG_ESPACE;

987 /* extension: multiple consecutive *+?{,} is unspecified ,	1001 /* extension: multiple consecutive *+?{,} is unspecified ,

988 but (a+)+ has to be supported so accepting a++ makes	1002 but (a+)+ has to be supported so accepting a++ makes

989 sense, note however that the RE_DUP_MAX limit can be	1003 sense, note however that the RE_DUP_MAX limit can be

990 circumvented: (a{255}){255} uses a lot of memory.. */	1004 circumvented: (a{255}){255} uses a lot of memory.. */

991 » » » goto parse_iter;	1005 » » » if (*s=='{') {

992 » » case '\\':	1006 » » » » s = parse_dup(s+1, ere, &min, &max);

993 » » » if (ere \|\| s[1] != '{')	1007 » » » » if (!s)

994 » » » » break;	1008 » » » » » return REG_BADBR;

995 » » » s++;	1009 » » » } else {

996 » » » goto parse_brace;	1010 » » » » min=0;

997 » » case '{':	1011 » » » » max=-1;

998 » » » if (!ere)	1012 » » » » if (*s == '+')

999 » » » » break;	1013 » » » » » min = 1;

1000 » » parse_brace:	1014 » » » » if (*s == '?')

1001 » » » err = parse_dup(ctx, s+1);	1015 » » » » » max = 1;

1002 » » » if (err != REG_OK)	1016 » » » » s++;

1003 » » » » return err;	1017 » » » }

1004 » » » s = ctx->s;	1018 » » » if (max == 0)

1005 » » » goto parse_iter;	1019 » » » » ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1 , -1);

	1020 » » » else

	1021 » » » » ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);

	1022 » » » if (!ctx->n)

	1023 » » » » return REG_ESPACE;

1006 }	1024 }

1007	1025

1008 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);	1026 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);

1009 if ((ere && *s == '\|') \|\|	1027 if ((ere && *s == '\|') \|\|

1010 (ere && *s == ')' && depth) \|\|	1028 (ere && *s == ')' && depth) \|\|

1011 (!ere && *s == '\\' && s[1] == ')') \|\|	1029 (!ere && *s == '\\' && s[1] == ')') \|\|

	1030 /* extension: treat \\| as alternation in BRE */

	1031 (!ere && *s == '\\' && s[1] == '\|') \|\|

1012 !*s) {	1032 !*s) {

1013 /* extension: empty branch is unspecified (), (\|a), (a\|)	1033 /* extension: empty branch is unspecified (), (\|a), (a\|)

1014 here they are not rejected but match on empty string */	1034 here they are not rejected but match on empty string */

1015 int c = *s;	1035 int c = *s;

1016 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);	1036 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);

1017 nbranch = 0;	1037 nbranch = 0;

1018 » » » if (c != '\|') {	1038

	1039 » » » if (c == '\\' && s[1] == '\|') {

	1040 » » » » s+=2;

	1041 » » » } else if (c == '\|') {

	1042 » » » » s++;

	1043 » » » } else {

1019 if (c == '\\') {	1044 if (c == '\\') {

1020 if (!depth) return REG_EPAREN;	1045 if (!depth) return REG_EPAREN;

1021 s+=2;	1046 s+=2;

1022 } else if (c == ')')	1047 } else if (c == ')')

1023 s++;	1048 s++;

1024 depth--;	1049 depth--;

1025 err = marksub(ctx, nunion, tre_stack_pop_int(sta ck));	1050 err = marksub(ctx, nunion, tre_stack_pop_int(sta ck));

1026 if (err != REG_OK)	1051 if (err != REG_OK)

1027 return err;	1052 return err;

1028 if (!c && depth<0) {	1053 if (!c && depth<0) {

1029 ctx->submatch_id = subid;	1054 ctx->submatch_id = subid;

1030 return REG_OK;	1055 return REG_OK;

1031 }	1056 }

1032 if (!c \|\| depth<0)	1057 if (!c \|\| depth<0)

1033 return REG_EPAREN;	1058 return REG_EPAREN;

1034 nbranch = tre_stack_pop_voidptr(stack);	1059 nbranch = tre_stack_pop_voidptr(stack);

1035 nunion = tre_stack_pop_voidptr(stack);	1060 nunion = tre_stack_pop_voidptr(stack);

1036 goto parse_iter;	1061 goto parse_iter;

1037 }	1062 }

1038 s++;

1039 }	1063 }

1040 }	1064 }

1041 }	1065 }

1042	1066

1043	1067

1044 /***********************************************************************	1068 /***********************************************************************

1045 from tre-compile.c	1069 from tre-compile.c

1046 ***********************************************************************/	1070 ***********************************************************************/

1047	1071

1048	1072

(...skipping 1608 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2657 tre_submatch_data_t *submatch_data;	2681 tre_submatch_data_t *submatch_data;

2658 tre_tag_direction_t *tag_directions = NULL;	2682 tre_tag_direction_t *tag_directions = NULL;

2659 reg_errcode_t errcode;	2683 reg_errcode_t errcode;

2660 tre_mem_t mem;	2684 tre_mem_t mem;

2661	2685

2662 /* Parse context. */	2686 /* Parse context. */

2663 tre_parse_ctx_t parse_ctx;	2687 tre_parse_ctx_t parse_ctx;

2664	2688

2665 /* Allocate a stack used throughout the compilation process for various	2689 /* Allocate a stack used throughout the compilation process for various

2666 purposes. */	2690 purposes. */

2667 stack = tre_stack_new(512, 10240, 128);	2691 stack = tre_stack_new(512, 1024000, 128);

2668 if (!stack)	2692 if (!stack)

2669 return REG_ESPACE;	2693 return REG_ESPACE;

2670 /* Allocate a fast memory allocator. */	2694 /* Allocate a fast memory allocator. */

2671 mem = tre_mem_new();	2695 mem = tre_mem_new();

2672 if (!mem)	2696 if (!mem)

2673 {	2697 {

2674 tre_stack_destroy(stack);	2698 tre_stack_destroy(stack);

2675 return REG_ESPACE;	2699 return REG_ESPACE;

2676 }	2700 }

2677	2701

(...skipping 223 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2901 }	2925 }

2902	2926

2903 if (tnfa->tag_directions)	2927 if (tnfa->tag_directions)

2904 xfree(tnfa->tag_directions);	2928 xfree(tnfa->tag_directions);

2905 if (tnfa->firstpos_chars)	2929 if (tnfa->firstpos_chars)

2906 xfree(tnfa->firstpos_chars);	2930 xfree(tnfa->firstpos_chars);

2907 if (tnfa->minimal_tags)	2931 if (tnfa->minimal_tags)

2908 xfree(tnfa->minimal_tags);	2932 xfree(tnfa->minimal_tags);

2909 xfree(tnfa);	2933 xfree(tnfa);

2910 }	2934 }

OLD	NEW

« no previous file with comments | « fusl/src/network/resolvconf.c ('k') | fusl/src/stdio/fread.c » ('j') | no next file with comments »