Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(27)

Side by Side Diff: third_party/re2/re2/testing/possible_match_test.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/re2/re2/testing/parse_test.cc ('k') | third_party/re2/re2/testing/random_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include <vector>
6 #include "util/test.h"
7 #include "re2/prog.h"
8 #include "re2/re2.h"
9 #include "re2/regexp.h"
10 #include "re2/testing/exhaustive_tester.h"
11 #include "re2/testing/regexp_generator.h"
12 #include "re2/testing/string_generator.h"
13
14 namespace re2 {
15
16 // Test that C++ strings are compared as uint8s, not int8s.
17 // PossibleMatchRange doesn't depend on this, but callers probably will.
18 TEST(CplusplusStrings, EightBit) {
19 string s = "\x70";
20 string t = "\xA0";
21 EXPECT_LT(s, t);
22 }
23
24 struct PrefixTest {
25 const char* regexp;
26 int maxlen;
27 const char* min;
28 const char* max;
29 };
30
31 static PrefixTest tests[] = {
32 { "", 10, "", "", },
33 { "Abcdef", 10, "Abcdef", "Abcdef" },
34 { "abc(def|ghi)", 10, "abcdef", "abcghi" },
35 { "a+hello", 10, "aa", "ahello" },
36 { "a*hello", 10, "a", "hello" },
37 { "def|abc", 10, "abc", "def" },
38 { "a(b)(c)[d]", 10, "abcd", "abcd" },
39 { "ab(cab|cat)", 10, "abcab", "abcat" },
40 { "ab(cab|ca)x", 10, "abcabx", "abcax" },
41 { "(ab|x)(c|de)", 10, "abc", "xde" },
42 { "(ab|x)?(c|z)?", 10, "", "z" },
43 { "[^\\s\\S]", 10, "", "" },
44 { "(abc)+", 5, "abc", "abcac" },
45 { "(abc)+", 2, "ab", "ac" },
46 { "(abc)+", 1, "a", "b" },
47 { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
48 { "a*", 10, "", "ab" },
49
50 { "(?i)Abcdef", 10, "ABCDEF", "abcdef" },
51 { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" },
52 { "(?i)a+hello", 10, "AA", "ahello" },
53 { "(?i)a*hello", 10, "A", "hello" },
54 { "(?i)def|abc", 10, "ABC", "def" },
55 { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" },
56 { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" },
57 { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" },
58 { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" },
59 { "(?i)(ab|x)?(c|z)?", 10, "", "z" },
60 { "(?i)[^\\s\\S]", 10, "", "" },
61 { "(?i)(abc)+", 5, "ABC", "abcac" },
62 { "(?i)(abc)+", 2, "AB", "ac" },
63 { "(?i)(abc)+", 1, "A", "b" },
64 { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
65 { "(?i)a*", 10, "", "ab" },
66 { "(?i)A*", 10, "", "ab" },
67
68 { "\\AAbcdef", 10, "Abcdef", "Abcdef" },
69 { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" },
70 { "\\Aa+hello", 10, "aa", "ahello" },
71 { "\\Aa*hello", 10, "a", "hello" },
72 { "\\Adef|abc", 10, "abc", "def" },
73 { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" },
74 { "\\Aab(cab|cat)", 10, "abcab", "abcat" },
75 { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" },
76 { "\\A(ab|x)(c|de)", 10, "abc", "xde" },
77 { "\\A(ab|x)?(c|z)?", 10, "", "z" },
78 { "\\A[^\\s\\S]", 10, "", "" },
79 { "\\A(abc)+", 5, "abc", "abcac" },
80 { "\\A(abc)+", 2, "ab", "ac" },
81 { "\\A(abc)+", 1, "a", "b" },
82 { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
83 { "\\Aa*", 10, "", "ab" },
84
85 { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" },
86 { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" },
87 { "(?i)\\Aa+hello", 10, "AA", "ahello" },
88 { "(?i)\\Aa*hello", 10, "A", "hello" },
89 { "(?i)\\Adef|abc", 10, "ABC", "def" },
90 { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" },
91 { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" },
92 { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" },
93 { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" },
94 { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" },
95 { "(?i)\\A[^\\s\\S]", 10, "", "" },
96 { "(?i)\\A(abc)+", 5, "ABC", "abcac" },
97 { "(?i)\\A(abc)+", 2, "AB", "ac" },
98 { "(?i)\\A(abc)+", 1, "A", "b" },
99 { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
100 { "(?i)\\Aa*", 10, "", "ab" },
101 { "(?i)\\AA*", 10, "", "ab" },
102 };
103
104 TEST(PossibleMatchRange, HandWritten) {
105 for (int i = 0; i < arraysize(tests); i++) {
106 for (int j = 0; j < 2; j++) {
107 const PrefixTest& t = tests[i];
108 string min, max;
109 if (j == 0) {
110 LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
111 Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
112 CHECK(re);
113 Prog* prog = re->CompileToProg(0);
114 CHECK(prog);
115 CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen))
116 << " " << t.regexp;
117 delete prog;
118 re->Decref();
119 } else {
120 CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
121 }
122 EXPECT_EQ(t.min, min) << t.regexp;
123 EXPECT_EQ(t.max, max) << t.regexp;
124 }
125 }
126 }
127
128 // Test cases where PossibleMatchRange should return false.
129 TEST(PossibleMatchRange, Failures) {
130 string min, max;
131
132 // Fails because no room to write max.
133 EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
134
135 // Fails because there is no max -- any non-empty string matches
136 // or begins a match. Have to use Latin-1 input, because there
137 // are no valid UTF-8 strings beginning with byte 0xFF.
138 EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
139 PossibleMatchRange(&min, &max, 10))
140 << "min=" << CEscape(min) << ", max=" << CEscape(max);
141 EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
142 PossibleMatchRange(&min, &max, 10))
143 << "min=" << CEscape(min) << ", max=" << CEscape(max);
144 EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
145 PossibleMatchRange(&min, &max, 10))
146 << "min=" << CEscape(min) << ", max=" << CEscape(max);
147 EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
148 PossibleMatchRange(&min, &max, 10))
149 << "min=" << CEscape(min) << ", max=" << CEscape(max);
150 EXPECT_FALSE(RE2(".*", RE2::Latin1).
151 PossibleMatchRange(&min, &max, 10))
152 << "min=" << CEscape(min) << ", max=" << CEscape(max);
153 EXPECT_FALSE(RE2("\\C*").
154 PossibleMatchRange(&min, &max, 10))
155 << "min=" << CEscape(min) << ", max=" << CEscape(max);
156
157 // Fails because it's a malformed regexp.
158 EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
159 << "min=" << CEscape(min) << ", max=" << CEscape(max);
160 }
161
162 // Exhaustive test: generate all regexps within parameters,
163 // then generate all strings of a given length over a given alphabet,
164 // then check that the prefix information agrees with whether
165 // the regexp matches each of the strings.
166 class PossibleMatchTester : public RegexpGenerator {
167 public:
168 PossibleMatchTester(int maxatoms,
169 int maxops,
170 const vector<string>& alphabet,
171 const vector<string>& ops,
172 int maxstrlen,
173 const vector<string>& stralphabet)
174 : RegexpGenerator(maxatoms, maxops, alphabet, ops),
175 strgen_(maxstrlen, stralphabet),
176 regexps_(0), tests_(0) { }
177
178 int regexps() { return regexps_; }
179 int tests() { return tests_; }
180
181 // Needed for RegexpGenerator interface.
182 void HandleRegexp(const string& regexp);
183
184 private:
185 StringGenerator strgen_;
186
187 int regexps_; // Number of HandleRegexp calls
188 int tests_; // Number of regexp tests.
189
190 DISALLOW_COPY_AND_ASSIGN(PossibleMatchTester);
191 };
192
193 // Processes a single generated regexp.
194 // Checks that all accepted strings agree with the prefix range.
195 void PossibleMatchTester::HandleRegexp(const string& regexp) {
196 regexps_++;
197
198 VLOG(3) << CEscape(regexp);
199
200 RE2 re(regexp, RE2::Latin1);
201 CHECK_EQ(re.error(), "");
202
203 string min, max;
204 if(!re.PossibleMatchRange(&min, &max, 10)) {
205 // There's no good max for "\\C*". Can't use strcmp
206 // because sometimes it gets embedded in more
207 // complicated expressions.
208 if(strstr(regexp.c_str(), "\\C*"))
209 return;
210 LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
211 }
212
213 strgen_.Reset();
214 while (strgen_.HasNext()) {
215 const StringPiece& s = strgen_.Next();
216 tests_++;
217 if (!RE2::FullMatch(s, re))
218 continue;
219 CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max;
220 CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min;
221 }
222 }
223
224 TEST(PossibleMatchRange, Exhaustive) {
225 int natom = 3;
226 int noperator = 3;
227 int stringlen = 5;
228 if (RE2_DEBUG_MODE) {
229 natom = 2;
230 noperator = 3;
231 stringlen = 3;
232 }
233 PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
234 RegexpGenerator::EgrepOps(),
235 stringlen, Explode("ab4"));
236 t.Generate();
237 LOG(INFO) << t.regexps() << " regexps, "
238 << t.tests() << " tests";
239 }
240
241 } // namespace re2
OLDNEW
« no previous file with comments | « third_party/re2/re2/testing/parse_test.cc ('k') | third_party/re2/re2/testing/random_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698