| Index: third_party/re2/re2/testing/exhaustive_tester.cc
|
| diff --git a/third_party/re2/re2/testing/exhaustive_tester.cc b/third_party/re2/re2/testing/exhaustive_tester.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..54de85748e7b9b484ae48dc53600465deb7d4bcb
|
| --- /dev/null
|
| +++ b/third_party/re2/re2/testing/exhaustive_tester.cc
|
| @@ -0,0 +1,188 @@
|
| +// Copyright 2008 The RE2 Authors. All Rights Reserved.
|
| +// Use of this source code is governed by a BSD-style
|
| +// license that can be found in the LICENSE file.
|
| +
|
| +// Exhaustive testing of regular expression matching.
|
| +
|
| +// Each test picks an alphabet (e.g., "abc"), a maximum string length,
|
| +// a maximum regular expression length, and a maximum number of letters
|
| +// that can appear in the regular expression. Given these parameters,
|
| +// it tries every possible regular expression and string, verifying that
|
| +// the NFA, DFA, and a trivial backtracking implementation agree about
|
| +// the location of the match.
|
| +
|
| +#include <stdlib.h>
|
| +#include <stdio.h>
|
| +
|
| +#ifndef LOGGING
|
| +#define LOGGING 0
|
| +#endif
|
| +
|
| +#include "util/test.h"
|
| +#include "re2/testing/exhaustive_tester.h"
|
| +#include "re2/testing/tester.h"
|
| +
|
| +DEFINE_bool(show_regexps, false, "show regexps during testing");
|
| +
|
| +DEFINE_int32(max_bad_regexp_inputs, 1,
|
| + "Stop testing a regular expression after finding this many "
|
| + "strings that break it.");
|
| +
|
| +// Compiled in debug mode, the usual tests run for over an hour.
|
| +// Have to cut it down to make the unit test machines happy.
|
| +DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
|
| +
|
| +namespace re2 {
|
| +
|
| +static char* escape(const StringPiece& sp) {
|
| + static char buf[512];
|
| + char* p = buf;
|
| + *p++ = '\"';
|
| + for (int i = 0; i < sp.size(); i++) {
|
| + if(p+5 >= buf+sizeof buf)
|
| + LOG(FATAL) << "ExhaustiveTester escape: too long";
|
| + if(sp[i] == '\\' || sp[i] == '\"') {
|
| + *p++ = '\\';
|
| + *p++ = sp[i];
|
| + } else if(sp[i] == '\n') {
|
| + *p++ = '\\';
|
| + *p++ = 'n';
|
| + } else {
|
| + *p++ = sp[i];
|
| + }
|
| + }
|
| + *p++ = '\"';
|
| + *p = '\0';
|
| + return buf;
|
| +}
|
| +
|
| +static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
|
| + if (!re.Match(input, 0, input.size(), anchor, m, n)) {
|
| + printf("-");
|
| + return;
|
| + }
|
| + for (int i = 0; i < n; i++) {
|
| + if (i > 0)
|
| + printf(" ");
|
| + if (m[i].begin() == NULL)
|
| + printf("-");
|
| + else
|
| + printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
|
| + }
|
| +}
|
| +
|
| +// Processes a single generated regexp.
|
| +// Compiles it using Regexp interface and PCRE, and then
|
| +// checks that NFA, DFA, and PCRE all return the same results.
|
| +void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
|
| + regexps_++;
|
| + string regexp = const_regexp;
|
| + if (!topwrapper_.empty())
|
| + regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
|
| +
|
| + if (FLAGS_show_regexps) {
|
| + printf("\r%s", regexp.c_str());
|
| + fflush(stdout);
|
| + }
|
| +
|
| + if (LOGGING) {
|
| + // Write out test cases and answers for use in testing
|
| + // other implementations, such as Go's regexp package.
|
| + if (randomstrings_)
|
| + LOG(ERROR) << "Cannot log with random strings.";
|
| + if (regexps_ == 1) { // first
|
| + printf("strings\n");
|
| + strgen_.Reset();
|
| + while (strgen_.HasNext())
|
| + printf("%s\n", escape(strgen_.Next()));
|
| + printf("regexps\n");
|
| + }
|
| + printf("%s\n", escape(regexp));
|
| +
|
| + RE2 re(regexp);
|
| + RE2::Options longest;
|
| + longest.set_longest_match(true);
|
| + RE2 relongest(regexp, longest);
|
| + int ngroup = re.NumberOfCapturingGroups()+1;
|
| + StringPiece* group = new StringPiece[ngroup];
|
| +
|
| + strgen_.Reset();
|
| + while (strgen_.HasNext()) {
|
| + StringPiece input = strgen_.Next();
|
| + PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
|
| + printf(";");
|
| + PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
|
| + printf(";");
|
| + PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
|
| + printf(";");
|
| + PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
|
| + printf("\n");
|
| + }
|
| + delete[] group;
|
| + return;
|
| + }
|
| +
|
| + Tester tester(regexp);
|
| + if (tester.error())
|
| + return;
|
| +
|
| + strgen_.Reset();
|
| + strgen_.GenerateNULL();
|
| + if (randomstrings_)
|
| + strgen_.Random(stringseed_, stringcount_);
|
| + int bad_inputs = 0;
|
| + while (strgen_.HasNext()) {
|
| + tests_++;
|
| + if (!tester.TestInput(strgen_.Next())) {
|
| + failures_++;
|
| + if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
|
| + break;
|
| + }
|
| + }
|
| +}
|
| +
|
| +// Runs an exhaustive test on the given parameters.
|
| +void ExhaustiveTest(int maxatoms, int maxops,
|
| + const vector<string>& alphabet,
|
| + const vector<string>& ops,
|
| + int maxstrlen, const vector<string>& stralphabet,
|
| + const string& wrapper,
|
| + const string& topwrapper) {
|
| + if (DEBUG_MODE && FLAGS_quick_debug_mode) {
|
| + if (maxatoms > 1)
|
| + maxatoms--;
|
| + if (maxops > 1)
|
| + maxops--;
|
| + if (maxstrlen > 1)
|
| + maxstrlen--;
|
| + }
|
| + ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
|
| + maxstrlen, stralphabet, wrapper,
|
| + topwrapper);
|
| + t.Generate();
|
| + if (!LOGGING) {
|
| + printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
|
| + t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
|
| + }
|
| + EXPECT_EQ(0, t.failures());
|
| +}
|
| +
|
| +// Runs an exhaustive test using the given parameters and
|
| +// the basic egrep operators.
|
| +void EgrepTest(int maxatoms, int maxops, const string& alphabet,
|
| + int maxstrlen, const string& stralphabet,
|
| + const string& wrapper) {
|
| + const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
|
| +
|
| + for (int i = 0; i < arraysize(tops); i++) {
|
| + ExhaustiveTest(maxatoms, maxops,
|
| + Split("", alphabet),
|
| + RegexpGenerator::EgrepOps(),
|
| + maxstrlen,
|
| + Split("", stralphabet),
|
| + wrapper,
|
| + tops[i]);
|
| + }
|
| +}
|
| +
|
| +} // namespace re2
|
|
|