| OLD | NEW |
| (Empty) |
| 1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 // Exhaustive testing of regular expression matching. | |
| 6 | |
| 7 // Each test picks an alphabet (e.g., "abc"), a maximum string length, | |
| 8 // a maximum regular expression length, and a maximum number of letters | |
| 9 // that can appear in the regular expression. Given these parameters, | |
| 10 // it tries every possible regular expression and string, verifying that | |
| 11 // the NFA, DFA, and a trivial backtracking implementation agree about | |
| 12 // the location of the match. | |
| 13 | |
| 14 #include <stdlib.h> | |
| 15 #include <stdio.h> | |
| 16 | |
| 17 #ifndef LOGGING | |
| 18 #define LOGGING 0 | |
| 19 #endif | |
| 20 | |
| 21 #include "util/test.h" | |
| 22 #include "re2/testing/exhaustive_tester.h" | |
| 23 #include "re2/testing/tester.h" | |
| 24 | |
| 25 DEFINE_bool(show_regexps, false, "show regexps during testing"); | |
| 26 | |
| 27 DEFINE_int32(max_bad_regexp_inputs, 1, | |
| 28 "Stop testing a regular expression after finding this many " | |
| 29 "strings that break it."); | |
| 30 | |
| 31 // Compiled in debug mode, the usual tests run for over an hour. | |
| 32 // Have to cut it down to make the unit test machines happy. | |
| 33 DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode."); | |
| 34 | |
| 35 namespace re2 { | |
| 36 | |
| 37 static char* escape(const StringPiece& sp) { | |
| 38 static char buf[512]; | |
| 39 char* p = buf; | |
| 40 *p++ = '\"'; | |
| 41 for (int i = 0; i < sp.size(); i++) { | |
| 42 if(p+5 >= buf+sizeof buf) | |
| 43 LOG(FATAL) << "ExhaustiveTester escape: too long"; | |
| 44 if(sp[i] == '\\' || sp[i] == '\"') { | |
| 45 *p++ = '\\'; | |
| 46 *p++ = sp[i]; | |
| 47 } else if(sp[i] == '\n') { | |
| 48 *p++ = '\\'; | |
| 49 *p++ = 'n'; | |
| 50 } else { | |
| 51 *p++ = sp[i]; | |
| 52 } | |
| 53 } | |
| 54 *p++ = '\"'; | |
| 55 *p = '\0'; | |
| 56 return buf; | |
| 57 } | |
| 58 | |
| 59 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anc
hor, StringPiece *m, int n) { | |
| 60 if (!re.Match(input, 0, input.size(), anchor, m, n)) { | |
| 61 printf("-"); | |
| 62 return; | |
| 63 } | |
| 64 for (int i = 0; i < n; i++) { | |
| 65 if (i > 0) | |
| 66 printf(" "); | |
| 67 if (m[i].begin() == NULL) | |
| 68 printf("-"); | |
| 69 else | |
| 70 printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cas
t<int>(m[i].end() - input.begin())); | |
| 71 } | |
| 72 } | |
| 73 | |
| 74 // Processes a single generated regexp. | |
| 75 // Compiles it using Regexp interface and PCRE, and then | |
| 76 // checks that NFA, DFA, and PCRE all return the same results. | |
| 77 void ExhaustiveTester::HandleRegexp(const string& const_regexp) { | |
| 78 regexps_++; | |
| 79 string regexp = const_regexp; | |
| 80 if (!topwrapper_.empty()) | |
| 81 regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str()); | |
| 82 | |
| 83 if (FLAGS_show_regexps) { | |
| 84 printf("\r%s", regexp.c_str()); | |
| 85 fflush(stdout); | |
| 86 } | |
| 87 | |
| 88 if (LOGGING) { | |
| 89 // Write out test cases and answers for use in testing | |
| 90 // other implementations, such as Go's regexp package. | |
| 91 if (randomstrings_) | |
| 92 LOG(ERROR) << "Cannot log with random strings."; | |
| 93 if (regexps_ == 1) { // first | |
| 94 printf("strings\n"); | |
| 95 strgen_.Reset(); | |
| 96 while (strgen_.HasNext()) | |
| 97 printf("%s\n", escape(strgen_.Next())); | |
| 98 printf("regexps\n"); | |
| 99 } | |
| 100 printf("%s\n", escape(regexp)); | |
| 101 | |
| 102 RE2 re(regexp); | |
| 103 RE2::Options longest; | |
| 104 longest.set_longest_match(true); | |
| 105 RE2 relongest(regexp, longest); | |
| 106 int ngroup = re.NumberOfCapturingGroups()+1; | |
| 107 StringPiece* group = new StringPiece[ngroup]; | |
| 108 | |
| 109 strgen_.Reset(); | |
| 110 while (strgen_.HasNext()) { | |
| 111 StringPiece input = strgen_.Next(); | |
| 112 PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup); | |
| 113 printf(";"); | |
| 114 PrintResult(re, input, RE2::UNANCHORED, group, ngroup); | |
| 115 printf(";"); | |
| 116 PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup); | |
| 117 printf(";"); | |
| 118 PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup); | |
| 119 printf("\n"); | |
| 120 } | |
| 121 delete[] group; | |
| 122 return; | |
| 123 } | |
| 124 | |
| 125 Tester tester(regexp); | |
| 126 if (tester.error()) | |
| 127 return; | |
| 128 | |
| 129 strgen_.Reset(); | |
| 130 strgen_.GenerateNULL(); | |
| 131 if (randomstrings_) | |
| 132 strgen_.Random(stringseed_, stringcount_); | |
| 133 int bad_inputs = 0; | |
| 134 while (strgen_.HasNext()) { | |
| 135 tests_++; | |
| 136 if (!tester.TestInput(strgen_.Next())) { | |
| 137 failures_++; | |
| 138 if (++bad_inputs >= FLAGS_max_bad_regexp_inputs) | |
| 139 break; | |
| 140 } | |
| 141 } | |
| 142 } | |
| 143 | |
| 144 // Runs an exhaustive test on the given parameters. | |
| 145 void ExhaustiveTest(int maxatoms, int maxops, | |
| 146 const vector<string>& alphabet, | |
| 147 const vector<string>& ops, | |
| 148 int maxstrlen, const vector<string>& stralphabet, | |
| 149 const string& wrapper, | |
| 150 const string& topwrapper) { | |
| 151 if (RE2_DEBUG_MODE && FLAGS_quick_debug_mode) { | |
| 152 if (maxatoms > 1) | |
| 153 maxatoms--; | |
| 154 if (maxops > 1) | |
| 155 maxops--; | |
| 156 if (maxstrlen > 1) | |
| 157 maxstrlen--; | |
| 158 } | |
| 159 ExhaustiveTester t(maxatoms, maxops, alphabet, ops, | |
| 160 maxstrlen, stralphabet, wrapper, | |
| 161 topwrapper); | |
| 162 t.Generate(); | |
| 163 if (!LOGGING) { | |
| 164 printf("%d regexps, %d tests, %d failures [%d/%d str]\n", | |
| 165 t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.siz
e()); | |
| 166 } | |
| 167 EXPECT_EQ(0, t.failures()); | |
| 168 } | |
| 169 | |
| 170 // Runs an exhaustive test using the given parameters and | |
| 171 // the basic egrep operators. | |
| 172 void EgrepTest(int maxatoms, int maxops, const string& alphabet, | |
| 173 int maxstrlen, const string& stralphabet, | |
| 174 const string& wrapper) { | |
| 175 const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" }; | |
| 176 | |
| 177 for (int i = 0; i < arraysize(tops); i++) { | |
| 178 ExhaustiveTest(maxatoms, maxops, | |
| 179 Split("", alphabet), | |
| 180 RegexpGenerator::EgrepOps(), | |
| 181 maxstrlen, | |
| 182 Split("", stralphabet), | |
| 183 wrapper, | |
| 184 tops[i]); | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 } // namespace re2 | |
| OLD | NEW |