OLD | NEW |
| (Empty) |
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 // Exhaustive testing of regular expression matching. | |
6 | |
7 // Each test picks an alphabet (e.g., "abc"), a maximum string length, | |
8 // a maximum regular expression length, and a maximum number of letters | |
9 // that can appear in the regular expression. Given these parameters, | |
10 // it tries every possible regular expression and string, verifying that | |
11 // the NFA, DFA, and a trivial backtracking implementation agree about | |
12 // the location of the match. | |
13 | |
14 #include <stdlib.h> | |
15 #include <stdio.h> | |
16 | |
17 #ifndef LOGGING | |
18 #define LOGGING 0 | |
19 #endif | |
20 | |
21 #include "util/test.h" | |
22 #include "re2/testing/exhaustive_tester.h" | |
23 #include "re2/testing/tester.h" | |
24 | |
25 DEFINE_bool(show_regexps, false, "show regexps during testing"); | |
26 | |
27 DEFINE_int32(max_bad_regexp_inputs, 1, | |
28 "Stop testing a regular expression after finding this many " | |
29 "strings that break it."); | |
30 | |
31 // Compiled in debug mode, the usual tests run for over an hour. | |
32 // Have to cut it down to make the unit test machines happy. | |
33 DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode."); | |
34 | |
35 namespace re2 { | |
36 | |
37 static char* escape(const StringPiece& sp) { | |
38 static char buf[512]; | |
39 char* p = buf; | |
40 *p++ = '\"'; | |
41 for (int i = 0; i < sp.size(); i++) { | |
42 if(p+5 >= buf+sizeof buf) | |
43 LOG(FATAL) << "ExhaustiveTester escape: too long"; | |
44 if(sp[i] == '\\' || sp[i] == '\"') { | |
45 *p++ = '\\'; | |
46 *p++ = sp[i]; | |
47 } else if(sp[i] == '\n') { | |
48 *p++ = '\\'; | |
49 *p++ = 'n'; | |
50 } else { | |
51 *p++ = sp[i]; | |
52 } | |
53 } | |
54 *p++ = '\"'; | |
55 *p = '\0'; | |
56 return buf; | |
57 } | |
58 | |
59 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
hor, StringPiece *m, int n) { | |
60 if (!re.Match(input, 0, input.size(), anchor, m, n)) { | |
61 printf("-"); | |
62 return; | |
63 } | |
64 for (int i = 0; i < n; i++) { | |
65 if (i > 0) | |
66 printf(" "); | |
67 if (m[i].begin() == NULL) | |
68 printf("-"); | |
69 else | |
70 printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cas
t<int>(m[i].end() - input.begin())); | |
71 } | |
72 } | |
73 | |
74 // Processes a single generated regexp. | |
75 // Compiles it using Regexp interface and PCRE, and then | |
76 // checks that NFA, DFA, and PCRE all return the same results. | |
77 void ExhaustiveTester::HandleRegexp(const string& const_regexp) { | |
78 regexps_++; | |
79 string regexp = const_regexp; | |
80 if (!topwrapper_.empty()) | |
81 regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str()); | |
82 | |
83 if (FLAGS_show_regexps) { | |
84 printf("\r%s", regexp.c_str()); | |
85 fflush(stdout); | |
86 } | |
87 | |
88 if (LOGGING) { | |
89 // Write out test cases and answers for use in testing | |
90 // other implementations, such as Go's regexp package. | |
91 if (randomstrings_) | |
92 LOG(ERROR) << "Cannot log with random strings."; | |
93 if (regexps_ == 1) { // first | |
94 printf("strings\n"); | |
95 strgen_.Reset(); | |
96 while (strgen_.HasNext()) | |
97 printf("%s\n", escape(strgen_.Next())); | |
98 printf("regexps\n"); | |
99 } | |
100 printf("%s\n", escape(regexp)); | |
101 | |
102 RE2 re(regexp); | |
103 RE2::Options longest; | |
104 longest.set_longest_match(true); | |
105 RE2 relongest(regexp, longest); | |
106 int ngroup = re.NumberOfCapturingGroups()+1; | |
107 StringPiece* group = new StringPiece[ngroup]; | |
108 | |
109 strgen_.Reset(); | |
110 while (strgen_.HasNext()) { | |
111 StringPiece input = strgen_.Next(); | |
112 PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup); | |
113 printf(";"); | |
114 PrintResult(re, input, RE2::UNANCHORED, group, ngroup); | |
115 printf(";"); | |
116 PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup); | |
117 printf(";"); | |
118 PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup); | |
119 printf("\n"); | |
120 } | |
121 delete[] group; | |
122 return; | |
123 } | |
124 | |
125 Tester tester(regexp); | |
126 if (tester.error()) | |
127 return; | |
128 | |
129 strgen_.Reset(); | |
130 strgen_.GenerateNULL(); | |
131 if (randomstrings_) | |
132 strgen_.Random(stringseed_, stringcount_); | |
133 int bad_inputs = 0; | |
134 while (strgen_.HasNext()) { | |
135 tests_++; | |
136 if (!tester.TestInput(strgen_.Next())) { | |
137 failures_++; | |
138 if (++bad_inputs >= FLAGS_max_bad_regexp_inputs) | |
139 break; | |
140 } | |
141 } | |
142 } | |
143 | |
144 // Runs an exhaustive test on the given parameters. | |
145 void ExhaustiveTest(int maxatoms, int maxops, | |
146 const vector<string>& alphabet, | |
147 const vector<string>& ops, | |
148 int maxstrlen, const vector<string>& stralphabet, | |
149 const string& wrapper, | |
150 const string& topwrapper) { | |
151 if (RE2_DEBUG_MODE && FLAGS_quick_debug_mode) { | |
152 if (maxatoms > 1) | |
153 maxatoms--; | |
154 if (maxops > 1) | |
155 maxops--; | |
156 if (maxstrlen > 1) | |
157 maxstrlen--; | |
158 } | |
159 ExhaustiveTester t(maxatoms, maxops, alphabet, ops, | |
160 maxstrlen, stralphabet, wrapper, | |
161 topwrapper); | |
162 t.Generate(); | |
163 if (!LOGGING) { | |
164 printf("%d regexps, %d tests, %d failures [%d/%d str]\n", | |
165 t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.siz
e()); | |
166 } | |
167 EXPECT_EQ(0, t.failures()); | |
168 } | |
169 | |
170 // Runs an exhaustive test using the given parameters and | |
171 // the basic egrep operators. | |
172 void EgrepTest(int maxatoms, int maxops, const string& alphabet, | |
173 int maxstrlen, const string& stralphabet, | |
174 const string& wrapper) { | |
175 const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" }; | |
176 | |
177 for (int i = 0; i < arraysize(tops); i++) { | |
178 ExhaustiveTest(maxatoms, maxops, | |
179 Split("", alphabet), | |
180 RegexpGenerator::EgrepOps(), | |
181 maxstrlen, | |
182 Split("", stralphabet), | |
183 wrapper, | |
184 tops[i]); | |
185 } | |
186 } | |
187 | |
188 } // namespace re2 | |
OLD | NEW |