| OLD | NEW |
| (Empty) |
| 1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 #include <vector> | |
| 6 #include "util/test.h" | |
| 7 #include "re2/prog.h" | |
| 8 #include "re2/re2.h" | |
| 9 #include "re2/regexp.h" | |
| 10 #include "re2/testing/exhaustive_tester.h" | |
| 11 #include "re2/testing/regexp_generator.h" | |
| 12 #include "re2/testing/string_generator.h" | |
| 13 | |
| 14 namespace re2 { | |
| 15 | |
| 16 // Test that C++ strings are compared as uint8s, not int8s. | |
| 17 // PossibleMatchRange doesn't depend on this, but callers probably will. | |
| 18 TEST(CplusplusStrings, EightBit) { | |
| 19 string s = "\x70"; | |
| 20 string t = "\xA0"; | |
| 21 EXPECT_LT(s, t); | |
| 22 } | |
| 23 | |
| 24 struct PrefixTest { | |
| 25 const char* regexp; | |
| 26 int maxlen; | |
| 27 const char* min; | |
| 28 const char* max; | |
| 29 }; | |
| 30 | |
| 31 static PrefixTest tests[] = { | |
| 32 { "", 10, "", "", }, | |
| 33 { "Abcdef", 10, "Abcdef", "Abcdef" }, | |
| 34 { "abc(def|ghi)", 10, "abcdef", "abcghi" }, | |
| 35 { "a+hello", 10, "aa", "ahello" }, | |
| 36 { "a*hello", 10, "a", "hello" }, | |
| 37 { "def|abc", 10, "abc", "def" }, | |
| 38 { "a(b)(c)[d]", 10, "abcd", "abcd" }, | |
| 39 { "ab(cab|cat)", 10, "abcab", "abcat" }, | |
| 40 { "ab(cab|ca)x", 10, "abcabx", "abcax" }, | |
| 41 { "(ab|x)(c|de)", 10, "abc", "xde" }, | |
| 42 { "(ab|x)?(c|z)?", 10, "", "z" }, | |
| 43 { "[^\\s\\S]", 10, "", "" }, | |
| 44 { "(abc)+", 5, "abc", "abcac" }, | |
| 45 { "(abc)+", 2, "ab", "ac" }, | |
| 46 { "(abc)+", 1, "a", "b" }, | |
| 47 { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, | |
| 48 { "a*", 10, "", "ab" }, | |
| 49 | |
| 50 { "(?i)Abcdef", 10, "ABCDEF", "abcdef" }, | |
| 51 { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" }, | |
| 52 { "(?i)a+hello", 10, "AA", "ahello" }, | |
| 53 { "(?i)a*hello", 10, "A", "hello" }, | |
| 54 { "(?i)def|abc", 10, "ABC", "def" }, | |
| 55 { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" }, | |
| 56 { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" }, | |
| 57 { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" }, | |
| 58 { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" }, | |
| 59 { "(?i)(ab|x)?(c|z)?", 10, "", "z" }, | |
| 60 { "(?i)[^\\s\\S]", 10, "", "" }, | |
| 61 { "(?i)(abc)+", 5, "ABC", "abcac" }, | |
| 62 { "(?i)(abc)+", 2, "AB", "ac" }, | |
| 63 { "(?i)(abc)+", 1, "A", "b" }, | |
| 64 { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, | |
| 65 { "(?i)a*", 10, "", "ab" }, | |
| 66 { "(?i)A*", 10, "", "ab" }, | |
| 67 | |
| 68 { "\\AAbcdef", 10, "Abcdef", "Abcdef" }, | |
| 69 { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" }, | |
| 70 { "\\Aa+hello", 10, "aa", "ahello" }, | |
| 71 { "\\Aa*hello", 10, "a", "hello" }, | |
| 72 { "\\Adef|abc", 10, "abc", "def" }, | |
| 73 { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" }, | |
| 74 { "\\Aab(cab|cat)", 10, "abcab", "abcat" }, | |
| 75 { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" }, | |
| 76 { "\\A(ab|x)(c|de)", 10, "abc", "xde" }, | |
| 77 { "\\A(ab|x)?(c|z)?", 10, "", "z" }, | |
| 78 { "\\A[^\\s\\S]", 10, "", "" }, | |
| 79 { "\\A(abc)+", 5, "abc", "abcac" }, | |
| 80 { "\\A(abc)+", 2, "ab", "ac" }, | |
| 81 { "\\A(abc)+", 1, "a", "b" }, | |
| 82 { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" }, | |
| 83 { "\\Aa*", 10, "", "ab" }, | |
| 84 | |
| 85 { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" }, | |
| 86 { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" }, | |
| 87 { "(?i)\\Aa+hello", 10, "AA", "ahello" }, | |
| 88 { "(?i)\\Aa*hello", 10, "A", "hello" }, | |
| 89 { "(?i)\\Adef|abc", 10, "ABC", "def" }, | |
| 90 { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" }, | |
| 91 { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" }, | |
| 92 { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" }, | |
| 93 { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" }, | |
| 94 { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" }, | |
| 95 { "(?i)\\A[^\\s\\S]", 10, "", "" }, | |
| 96 { "(?i)\\A(abc)+", 5, "ABC", "abcac" }, | |
| 97 { "(?i)\\A(abc)+", 2, "AB", "ac" }, | |
| 98 { "(?i)\\A(abc)+", 1, "A", "b" }, | |
| 99 { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" }, | |
| 100 { "(?i)\\Aa*", 10, "", "ab" }, | |
| 101 { "(?i)\\AA*", 10, "", "ab" }, | |
| 102 }; | |
| 103 | |
| 104 TEST(PossibleMatchRange, HandWritten) { | |
| 105 for (int i = 0; i < arraysize(tests); i++) { | |
| 106 for (int j = 0; j < 2; j++) { | |
| 107 const PrefixTest& t = tests[i]; | |
| 108 string min, max; | |
| 109 if (j == 0) { | |
| 110 LOG(INFO) << "Checking regexp=" << CEscape(t.regexp); | |
| 111 Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); | |
| 112 CHECK(re); | |
| 113 Prog* prog = re->CompileToProg(0); | |
| 114 CHECK(prog); | |
| 115 CHECK(prog->PossibleMatchRange(&min, &max, t.maxlen)) | |
| 116 << " " << t.regexp; | |
| 117 delete prog; | |
| 118 re->Decref(); | |
| 119 } else { | |
| 120 CHECK(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen)); | |
| 121 } | |
| 122 EXPECT_EQ(t.min, min) << t.regexp; | |
| 123 EXPECT_EQ(t.max, max) << t.regexp; | |
| 124 } | |
| 125 } | |
| 126 } | |
| 127 | |
| 128 // Test cases where PossibleMatchRange should return false. | |
| 129 TEST(PossibleMatchRange, Failures) { | |
| 130 string min, max; | |
| 131 | |
| 132 // Fails because no room to write max. | |
| 133 EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0)); | |
| 134 | |
| 135 // Fails because there is no max -- any non-empty string matches | |
| 136 // or begins a match. Have to use Latin-1 input, because there | |
| 137 // are no valid UTF-8 strings beginning with byte 0xFF. | |
| 138 EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1). | |
| 139 PossibleMatchRange(&min, &max, 10)) | |
| 140 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
| 141 EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1). | |
| 142 PossibleMatchRange(&min, &max, 10)) | |
| 143 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
| 144 EXPECT_FALSE(RE2(".+hello", RE2::Latin1). | |
| 145 PossibleMatchRange(&min, &max, 10)) | |
| 146 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
| 147 EXPECT_FALSE(RE2(".*hello", RE2::Latin1). | |
| 148 PossibleMatchRange(&min, &max, 10)) | |
| 149 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
| 150 EXPECT_FALSE(RE2(".*", RE2::Latin1). | |
| 151 PossibleMatchRange(&min, &max, 10)) | |
| 152 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
| 153 EXPECT_FALSE(RE2("\\C*"). | |
| 154 PossibleMatchRange(&min, &max, 10)) | |
| 155 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
| 156 | |
| 157 // Fails because it's a malformed regexp. | |
| 158 EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10)) | |
| 159 << "min=" << CEscape(min) << ", max=" << CEscape(max); | |
| 160 } | |
| 161 | |
| 162 // Exhaustive test: generate all regexps within parameters, | |
| 163 // then generate all strings of a given length over a given alphabet, | |
| 164 // then check that the prefix information agrees with whether | |
| 165 // the regexp matches each of the strings. | |
| 166 class PossibleMatchTester : public RegexpGenerator { | |
| 167 public: | |
| 168 PossibleMatchTester(int maxatoms, | |
| 169 int maxops, | |
| 170 const vector<string>& alphabet, | |
| 171 const vector<string>& ops, | |
| 172 int maxstrlen, | |
| 173 const vector<string>& stralphabet) | |
| 174 : RegexpGenerator(maxatoms, maxops, alphabet, ops), | |
| 175 strgen_(maxstrlen, stralphabet), | |
| 176 regexps_(0), tests_(0) { } | |
| 177 | |
| 178 int regexps() { return regexps_; } | |
| 179 int tests() { return tests_; } | |
| 180 | |
| 181 // Needed for RegexpGenerator interface. | |
| 182 void HandleRegexp(const string& regexp); | |
| 183 | |
| 184 private: | |
| 185 StringGenerator strgen_; | |
| 186 | |
| 187 int regexps_; // Number of HandleRegexp calls | |
| 188 int tests_; // Number of regexp tests. | |
| 189 | |
| 190 DISALLOW_COPY_AND_ASSIGN(PossibleMatchTester); | |
| 191 }; | |
| 192 | |
| 193 // Processes a single generated regexp. | |
| 194 // Checks that all accepted strings agree with the prefix range. | |
| 195 void PossibleMatchTester::HandleRegexp(const string& regexp) { | |
| 196 regexps_++; | |
| 197 | |
| 198 VLOG(3) << CEscape(regexp); | |
| 199 | |
| 200 RE2 re(regexp, RE2::Latin1); | |
| 201 CHECK_EQ(re.error(), ""); | |
| 202 | |
| 203 string min, max; | |
| 204 if(!re.PossibleMatchRange(&min, &max, 10)) { | |
| 205 // There's no good max for "\\C*". Can't use strcmp | |
| 206 // because sometimes it gets embedded in more | |
| 207 // complicated expressions. | |
| 208 if(strstr(regexp.c_str(), "\\C*")) | |
| 209 return; | |
| 210 LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp); | |
| 211 } | |
| 212 | |
| 213 strgen_.Reset(); | |
| 214 while (strgen_.HasNext()) { | |
| 215 const StringPiece& s = strgen_.Next(); | |
| 216 tests_++; | |
| 217 if (!RE2::FullMatch(s, re)) | |
| 218 continue; | |
| 219 CHECK_GE(s, min) << " regexp: " << regexp << " max: " << max; | |
| 220 CHECK_LE(s, max) << " regexp: " << regexp << " min: " << min; | |
| 221 } | |
| 222 } | |
| 223 | |
| 224 TEST(PossibleMatchRange, Exhaustive) { | |
| 225 int natom = 3; | |
| 226 int noperator = 3; | |
| 227 int stringlen = 5; | |
| 228 if (RE2_DEBUG_MODE) { | |
| 229 natom = 2; | |
| 230 noperator = 3; | |
| 231 stringlen = 3; | |
| 232 } | |
| 233 PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"), | |
| 234 RegexpGenerator::EgrepOps(), | |
| 235 stringlen, Explode("ab4")); | |
| 236 t.Generate(); | |
| 237 LOG(INFO) << t.regexps() << " regexps, " | |
| 238 << t.tests() << " tests"; | |
| 239 } | |
| 240 | |
| 241 } // namespace re2 | |
| OLD | NEW |