| Index: third_party/re2/re2/testing/filtered_re2_test.cc
|
| diff --git a/third_party/re2/re2/testing/filtered_re2_test.cc b/third_party/re2/re2/testing/filtered_re2_test.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..7755d30622e29ac7e65f6244a45c4c11eb37918c
|
| --- /dev/null
|
| +++ b/third_party/re2/re2/testing/filtered_re2_test.cc
|
| @@ -0,0 +1,258 @@
|
| +// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
| +// Use of this source code is governed by a BSD-style
|
| +// license that can be found in the LICENSE file.
|
| +
|
| +#include "util/test.h"
|
| +#include "re2/filtered_re2.h"
|
| +#include "re2/re2.h"
|
| +
|
| +DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc
|
| +
|
| +namespace re2 {
|
| +
|
| +struct FilterTestVars {
|
| + vector<string> atoms;
|
| + vector<int> atom_indices;
|
| + vector<int> matches;
|
| + RE2::Options opts;
|
| + FilteredRE2 f;
|
| +};
|
| +
|
| +TEST(FilteredRE2Test, EmptyTest) {
|
| + FilterTestVars v;
|
| + v.f.AllMatches("foo", v.atom_indices, &v.matches);
|
| + EXPECT_EQ(0, v.matches.size());
|
| +}
|
| +
|
| +TEST(FilteredRE2Test, SmallOrTest) {
|
| + FLAGS_filtered_re2_min_atom_len = 4;
|
| +
|
| + FilterTestVars v;
|
| + int id;
|
| + v.f.Add("(foo|bar)", v.opts, &id);
|
| +
|
| + v.f.Compile(&v.atoms);
|
| + EXPECT_EQ(0, v.atoms.size());
|
| +
|
| + v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
|
| + EXPECT_EQ(1, v.matches.size());
|
| + EXPECT_EQ(id, v.matches[0]);
|
| +}
|
| +
|
| +struct AtomTest {
|
| + const char* testname;
|
| + // If any test needs more than this many regexps or atoms, increase
|
| + // the size of the corresponding array.
|
| + const char* regexps[20];
|
| + const char* atoms[20];
|
| +};
|
| +
|
| +AtomTest atom_tests[] = {
|
| + {
|
| + // This test checks to make sure empty patterns are allowed.
|
| + "CheckEmptyPattern",
|
| + {""},
|
| + {}
|
| + }, {
|
| + // This test checks that all atoms of length greater than min length
|
| + // are found, and no atoms that are of smaller length are found.
|
| + "AllAtomsGtMinLengthFound", {
|
| + "(abc123|def456|ghi789).*mnop[x-z]+",
|
| + "abc..yyy..zz",
|
| + "mnmnpp[a-z]+PPP"
|
| + }, {
|
| + "abc123",
|
| + "def456",
|
| + "ghi789",
|
| + "mnop",
|
| + "abc",
|
| + "yyy",
|
| + "mnmnpp",
|
| + "ppp"
|
| + }
|
| + }, {
|
| + // Test to make sure that any atoms that have another atom as a
|
| + // substring in an OR are removed; that is, only the shortest
|
| + // substring is kept.
|
| + "SubstrAtomRemovesSuperStrInOr", {
|
| + "(abc123|abc|ghi789|abc1234).*[x-z]+",
|
| + "abcd..yyy..yyyzzz",
|
| + "mnmnpp[a-z]+PPP"
|
| + }, {
|
| + "abc",
|
| + "ghi789",
|
| + "abcd",
|
| + "yyy",
|
| + "yyyzzz",
|
| + "mnmnpp",
|
| + "ppp"
|
| + }
|
| + }, {
|
| + // Test character class expansion.
|
| + "CharClassExpansion", {
|
| + "m[a-c][d-f]n.*[x-z]+",
|
| + "[x-y]bcde[ab]"
|
| + }, {
|
| + "madn", "maen", "mafn",
|
| + "mbdn", "mben", "mbfn",
|
| + "mcdn", "mcen", "mcfn",
|
| + "xbcdea", "xbcdeb",
|
| + "ybcdea", "ybcdeb"
|
| + }
|
| + }, {
|
| + // Test upper/lower of non-ASCII.
|
| + "UnicodeLower", {
|
| + "(?i)ΔδΠϖπΣςσ",
|
| + "ΛΜΝΟΠ",
|
| + "ψρστυ",
|
| + }, {
|
| + "δδπππσσσ",
|
| + "λμνοπ",
|
| + "ψρστυ",
|
| + },
|
| + },
|
| +};
|
| +
|
| +void AddRegexpsAndCompile(const char* regexps[],
|
| + int n,
|
| + struct FilterTestVars* v) {
|
| + for (int i = 0; i < n; i++) {
|
| + int id;
|
| + v->f.Add(regexps[i], v->opts, &id);
|
| + }
|
| + v->f.Compile(&v->atoms);
|
| +}
|
| +
|
| +bool CheckExpectedAtoms(const char* atoms[],
|
| + int n,
|
| + const char* testname,
|
| + struct FilterTestVars* v) {
|
| + vector<string> expected;
|
| + for (int i = 0; i < n; i++)
|
| + expected.push_back(atoms[i]);
|
| +
|
| + bool pass = expected.size() == v->atoms.size();
|
| +
|
| + sort(v->atoms.begin(), v->atoms.end());
|
| + sort(expected.begin(), expected.end());
|
| + for (int i = 0; pass && i < n; i++)
|
| + pass = pass && expected[i] == v->atoms[i];
|
| +
|
| + if (!pass) {
|
| + LOG(WARNING) << "Failed " << testname;
|
| + LOG(WARNING) << "Expected #atoms = " << expected.size();
|
| + for (int i = 0; i < expected.size(); i++)
|
| + LOG(WARNING) << expected[i];
|
| + LOG(WARNING) << "Found #atoms = " << v->atoms.size();
|
| + for (int i = 0; i < v->atoms.size(); i++)
|
| + LOG(WARNING) << v->atoms[i];
|
| + }
|
| +
|
| + return pass;
|
| +}
|
| +
|
| +TEST(FilteredRE2Test, AtomTests) {
|
| + FLAGS_filtered_re2_min_atom_len = 3;
|
| +
|
| + int nfail = 0;
|
| + for (int i = 0; i < arraysize(atom_tests); i++) {
|
| + FilterTestVars v;
|
| + AtomTest* t = &atom_tests[i];
|
| + int natom, nregexp;
|
| + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
|
| + if (t->regexps[nregexp] == NULL)
|
| + break;
|
| + for (natom = 0; natom < arraysize(t->atoms); natom++)
|
| + if (t->atoms[natom] == NULL)
|
| + break;
|
| + AddRegexpsAndCompile(t->regexps, nregexp, &v);
|
| + if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v))
|
| + nfail++;
|
| + }
|
| + EXPECT_EQ(0, nfail);
|
| +}
|
| +
|
| +void FindAtomIndices(const vector<string> atoms,
|
| + const vector<string> matched_atoms,
|
| + vector<int>* atom_indices) {
|
| + atom_indices->clear();
|
| + for (int i = 0; i < matched_atoms.size(); i++) {
|
| + int j = 0;
|
| + for (; j < atoms.size(); j++) {
|
| + if (matched_atoms[i] == atoms[j]) {
|
| + atom_indices->push_back(j);
|
| + break;
|
| + }
|
| + EXPECT_LT(j, atoms.size());
|
| + }
|
| + }
|
| +}
|
| +
|
| +TEST(FilteredRE2Test, MatchEmptyPattern) {
|
| + FLAGS_filtered_re2_min_atom_len = 3;
|
| +
|
| + FilterTestVars v;
|
| + AtomTest* t = &atom_tests[0];
|
| + // We are using the regexps used in one of the atom tests
|
| + // for this test. Adding the EXPECT here to make sure
|
| + // the index we use for the test is for the correct test.
|
| + EXPECT_EQ("CheckEmptyPattern", string(t->testname));
|
| + int nregexp;
|
| + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
|
| + if (t->regexps[nregexp] == NULL)
|
| + break;
|
| + AddRegexpsAndCompile(t->regexps, nregexp, &v);
|
| + string text = "0123";
|
| + vector<int> atom_ids;
|
| + vector<int> matching_regexps;
|
| + EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
|
| +}
|
| +
|
| +TEST(FilteredRE2Test, MatchTests) {
|
| + FLAGS_filtered_re2_min_atom_len = 3;
|
| +
|
| + FilterTestVars v;
|
| + AtomTest* t = &atom_tests[2];
|
| + // We are using the regexps used in one of the atom tests
|
| + // for this test.
|
| + EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname));
|
| + int nregexp;
|
| + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
|
| + if (t->regexps[nregexp] == NULL)
|
| + break;
|
| + AddRegexpsAndCompile(t->regexps, nregexp, &v);
|
| +
|
| + string text = "abc121212xyz";
|
| + // atoms = abc
|
| + vector<int> atom_ids;
|
| + vector<string> atoms;
|
| + atoms.push_back("abc");
|
| + FindAtomIndices(v.atoms, atoms, &atom_ids);
|
| + vector<int> matching_regexps;
|
| + v.f.AllMatches(text, atom_ids, &matching_regexps);
|
| + EXPECT_EQ(1, matching_regexps.size());
|
| +
|
| + text = "abc12312yyyzzz";
|
| + atoms.clear();
|
| + atoms.push_back("abc");
|
| + atoms.push_back("yyy");
|
| + atoms.push_back("yyyzzz");
|
| + FindAtomIndices(v.atoms, atoms, &atom_ids);
|
| + v.f.AllMatches(text, atom_ids, &matching_regexps);
|
| + EXPECT_EQ(1, matching_regexps.size());
|
| +
|
| + text = "abcd12yyy32yyyzzz";
|
| + atoms.clear();
|
| + atoms.push_back("abc");
|
| + atoms.push_back("abcd");
|
| + atoms.push_back("yyy");
|
| + atoms.push_back("yyyzzz");
|
| + FindAtomIndices(v.atoms, atoms, &atom_ids);
|
| + LOG(INFO) << "S: " << atom_ids.size();
|
| + for (int i = 0; i < atom_ids.size(); i++)
|
| + LOG(INFO) << "i: " << i << " : " << atom_ids[i];
|
| + v.f.AllMatches(text, atom_ids, &matching_regexps);
|
| + EXPECT_EQ(2, matching_regexps.size());
|
| +}
|
| +
|
| +} // namespace re2
|
|
|