OLD | NEW |
| (Empty) |
1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 | |
5 #include <vector> | |
6 #include "util/test.h" | |
7 #include "re2/prog.h" | |
8 #include "re2/re2.h" | |
9 #include "re2/regexp.h" | |
10 #include "re2/testing/exhaustive_tester.h" | |
11 #include "re2/testing/regexp_generator.h" | |
12 #include "re2/testing/string_generator.h" | |
13 | |
14 namespace re2 { | |
15 | |
16 // Test that C++ strings are compared as uint8s, not int8s. | |
17 // PossibleMatchRange doesn't depend on this, but callers probably will. | |
18 TEST(CplusplusStrings, EightBit) { | |
19 string s = "\x70"; | |
20 string t = "\xA0"; | |
21 EXPECT_LT(s, t); | |
22 } | |
23 | |
24 struct PrefixTest { | |
25 const char* regexp; | |
26 int maxlen; | |
27 const char* min; | |
28 const char* max; | |
29 }; | |
30 | |
31 static PrefixTest tests[] = { | |
32 { "", 10, "", "", }, | |
33 { "Abcdef", 10, "Abcdef", "Abcdef" }, | |
34 { "abc(def|ghi)", 10, "abcdef", "abcghi" }, | |
35 { "a+hello", 10, "aa", "ahello" }, | |
36 { "a*hello", 10, "a", "hello" }, | |
37 { "def|abc", 10, "abc", "def" }, | |
38 { "a(b)(c)[d]", 10, "abcd", "abcd" }, | |
39 { "ab(cab|cat)", 10, "abcab", "abcat" }, | |
40 { "ab(cab|ca)x", 10, "abcabx", "abcax" }, | |
41 { "(ab|x)(c|de)", 10, "abc", "xde" }, | |
42 { "(ab|x)?(c|z)?", 10, "", "z" }, | |
43 { "[^\\s\\S]", 10, "", "" }, | |
44 { "(abc)+", 5, "abc", "abcac" }, | |
45 { "(abc)+", 2, "ab", "ac" }, | |
46 { "(abc)+", 1, "a", "b" }, | |
47 { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, | |
48 { "a*", 10, "", "ab" }, | |
49 | |
50 { "(?i)Abcdef", 10, "ABCDEF", "abcdef" }, | |
51 { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" }, | |
52 { "(?i)a+hello", 10, "AA", "ahello" }, | |
53 { "(?i)a*hello", 10, "A", "hello" }, | |
54 { "(?i)def|abc", 10, "ABC", "def" }, | |
55 { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" }, | |
56 { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" }, | |
57 { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" }, | |
58 { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" }, | |
59 { "(?i)(ab|x)?(c|z)?", 10, "", "z" }, | |
60 { "(?i)[^\\s\\S]", 10, "", "" }, | |
61 { "(?i)(abc)+", 5, "ABC", "abcac" }, | |
62 { "(?i)(abc)+", 2, "AB", "ac" }, | |
63 { "(?i)(abc)+", 1, "A", "b" }, | |
64 { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, | |
65 { "(?i)a*", 10, "", "ab" }, | |
66 { "(?i)A*", 10, "", "ab" }, | |
67 | |
68 { "\\AAbcdef", 10, "Abcdef", "Abcdef" }, | |
69 { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" }, | |
70 { "\\Aa+hello", 10, "aa", "ahello" }, | |
71 { "\\Aa*hello", 10, "a", "hello" }, | |
72 { "\\Adef|abc", 10, "abc", "def" }, | |
73 { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" }, | |
74 { "\\Aab(cab|cat)", 10, "abcab", "abcat" }, | |
75 { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" }, | |
76 { "\\A(ab|x)(c|de)", 10, "abc", "xde" }, | |
77 { "\\A(ab|x)?(c|z)?", 10, "", "z" }, | |
78 { "\\A[^\\s\\S]", 10, "", "" }, | |
79 { "\\A(abc)+", 5, "abc", "abcac" }, | |
80 { "\\A(abc)+", 2, "ab", "ac" }, | |
81 { "\\A(abc)+", 1, "a", "b" }, | |
82 { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, | |
83 { "\\Aa*", 10, "", "ab" }, | |
84 | |
85 { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" }, | |
86 { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" }, | |
87 { "(?i)\\Aa+hello", 10, "AA", "ahello" }, | |
88 { "(?i)\\Aa*hello", 10, "A", "hello" }, | |
89 { "(?i)\\Adef|abc", 10, "ABC", "def" }, | |
90 { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" }, | |
91 { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" }, | |
92 { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" }, | |
93 { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" }, | |
94 { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" }, | |
95 { "(?i)\\A[^\\s\\S]", 10, "", "" }, | |
96 { "(?i)\\A(abc)+", 5, "ABC", "abcac" }, | |
97 { "(?i)\\A(abc)+", 2, "AB", "ac" }, | |
98 { "(?i)\\A(abc)+", 1, "A", "b" }, | |
99 { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, | |
100 { "(?i)\\Aa*", 10, "", "ab" }, | |
101 { "(?i)\\AA*", 10, "", "ab" }, | |
102 }; | |
103 | |
104 TEST(PossibleMatchRange, HandWritten) { | |
105 for (int i = 0; i < arraysize(tests); i++) { | |
106 for (int j = 0; j < 2; j++) { | |
107 const PrefixTest& t = tests[i]; | |
108 string min, max; | |
109 if (j == 0) { | |
110 LOG(INFO) << "Checking regexp=" << CEscape(t.regexp); | |
111 Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); | |
112 CHECK(re); | |
113 Prog* prog = re->CompileToProg(0); | |
114 CHECK(prog); | |
115 CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen)) | |
116 << " " << t.regexp; | |
117 delete prog; | |
118 re->Decref(); | |
119 } else { | |
120 CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen)); | |
121 } | |
122 EXPECT_EQ(t.min, min) << t.regexp; | |
123 EXPECT_EQ(t.max, max) << t.regexp; | |
124 } | |
125 } | |
126 } | |
127 | |
128 // Test cases where PossibleMatchRange should return false. | |
129 TEST(PossibleMatchRange, Failures) { | |
130 string min, max; | |
131 | |
132 // Fails because no room to write max. | |
133 EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0)); | |
134 | |
135 // Fails because there is no max -- any non-empty string matches | |
136 // or begins a match. Have to use Latin-1 input, because there | |
137 // are no valid UTF-8 strings beginning with byte 0xFF. | |
138 EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1). | |
139 PossibleMatchRange(&min, &max, 10)) | |
140 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
141 EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1). | |
142 PossibleMatchRange(&min, &max, 10)) | |
143 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
144 EXPECT_FALSE(RE2(".+hello", RE2::Latin1). | |
145 PossibleMatchRange(&min, &max, 10)) | |
146 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
147 EXPECT_FALSE(RE2(".*hello", RE2::Latin1). | |
148 PossibleMatchRange(&min, &max, 10)) | |
149 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
150 EXPECT_FALSE(RE2(".*", RE2::Latin1). | |
151 PossibleMatchRange(&min, &max, 10)) | |
152 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
153 EXPECT_FALSE(RE2("\\C*"). | |
154 PossibleMatchRange(&min, &max, 10)) | |
155 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
156 | |
157 // Fails because it's a malformed regexp. | |
158 EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10)) | |
159 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
160 } | |
161 | |
162 // Exhaustive test: generate all regexps within parameters, | |
163 // then generate all strings of a given length over a given alphabet, | |
164 // then check that the prefix information agrees with whether | |
165 // the regexp matches each of the strings. | |
166 class PossibleMatchTester : public RegexpGenerator { | |
167 public: | |
168 PossibleMatchTester(int maxatoms, | |
169 int maxops, | |
170 const vector<string>& alphabet, | |
171 const vector<string>& ops, | |
172 int maxstrlen, | |
173 const vector<string>& stralphabet) | |
174 : RegexpGenerator(maxatoms, maxops, alphabet, ops), | |
175 strgen_(maxstrlen, stralphabet), | |
176 regexps_(0), tests_(0) { } | |
177 | |
178 int regexps() { return regexps_; } | |
179 int tests() { return tests_; } | |
180 | |
181 // Needed for RegexpGenerator interface. | |
182 void HandleRegexp(const string& regexp); | |
183 | |
184 private: | |
185 StringGenerator strgen_; | |
186 | |
187 int regexps_; // Number of HandleRegexp calls | |
188 int tests_; // Number of regexp tests. | |
189 | |
190 DISALLOW_COPY_AND_ASSIGN(PossibleMatchTester); | |
191 }; | |
192 | |
193 // Processes a single generated regexp. | |
194 // Checks that all accepted strings agree with the prefix range. | |
195 void PossibleMatchTester::HandleRegexp(const string& regexp) { | |
196 regexps_++; | |
197 | |
198 VLOG(3) << CEscape(regexp); | |
199 | |
200 RE2 re(regexp, RE2::Latin1); | |
201 CHECK_EQ(re.error(), ""); | |
202 | |
203 string min, max; | |
204 if(!re.PossibleMatchRange(&min, &max, 10)) { | |
205 // There's no good max for "\\C*". Can't use strcmp | |
206 // because sometimes it gets embedded in more | |
207 // complicated expressions. | |
208 if(strstr(regexp.c_str(), "\\C*")) | |
209 return; | |
210 LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp); | |
211 } | |
212 | |
213 strgen_.Reset(); | |
214 while (strgen_.HasNext()) { | |
215 const StringPiece& s = strgen_.Next(); | |
216 tests_++; | |
217 if (!RE2::FullMatch(s, re)) | |
218 continue; | |
219 CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max; | |
220 CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min; | |
221 } | |
222 } | |
223 | |
224 TEST(PossibleMatchRange, Exhaustive) { | |
225 int natom = 3; | |
226 int noperator = 3; | |
227 int stringlen = 5; | |
228 if (RE2_DEBUG_MODE) { | |
229 natom = 2; | |
230 noperator = 3; | |
231 stringlen = 3; | |
232 } | |
233 PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"), | |
234 RegexpGenerator::EgrepOps(), | |
235 stringlen, Explode("ab4")); | |
236 t.Generate(); | |
237 LOG(INFO) << t.regexps() << " regexps, " | |
238 << t.tests() << " tests"; | |
239 } | |
240 | |
241 } // namespace re2 | |
OLD | NEW |