| Index: third_party/re2/re2/testing/filtered_re2_test.cc
|
| diff --git a/third_party/re2/re2/testing/filtered_re2_test.cc b/third_party/re2/re2/testing/filtered_re2_test.cc
|
| deleted file mode 100644
|
| index 76c1284d2fc9e5f4b3bec8235004af2ef79e6252..0000000000000000000000000000000000000000
|
| --- a/third_party/re2/re2/testing/filtered_re2_test.cc
|
| +++ /dev/null
|
| @@ -1,273 +0,0 @@
|
| -// Copyright 2009 The RE2 Authors. All Rights Reserved.
|
| -// Use of this source code is governed by a BSD-style
|
| -// license that can be found in the LICENSE file.
|
| -
|
| -#include "util/test.h"
|
| -#include "re2/filtered_re2.h"
|
| -#include "re2/re2.h"
|
| -
|
| -DECLARE_int32(filtered_re2_min_atom_len); // From prefilter_tree.cc
|
| -
|
| -namespace re2 {
|
| -
|
| -struct FilterTestVars {
|
| - vector<string> atoms;
|
| - vector<int> atom_indices;
|
| - vector<int> matches;
|
| - RE2::Options opts;
|
| - FilteredRE2 f;
|
| -};
|
| -
|
| -TEST(FilteredRE2Test, EmptyTest) {
|
| - FilterTestVars v;
|
| - v.f.AllMatches("foo", v.atom_indices, &v.matches);
|
| - EXPECT_EQ(0, v.matches.size());
|
| -}
|
| -
|
| -TEST(FilteredRE2Test, SmallOrTest) {
|
| - FLAGS_filtered_re2_min_atom_len = 4;
|
| -
|
| - FilterTestVars v;
|
| - int id;
|
| - v.f.Add("(foo|bar)", v.opts, &id);
|
| -
|
| - v.f.Compile(&v.atoms);
|
| - EXPECT_EQ(0, v.atoms.size());
|
| -
|
| - v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
|
| - EXPECT_EQ(1, v.matches.size());
|
| - EXPECT_EQ(id, v.matches[0]);
|
| -}
|
| -
|
| -TEST(FilteredRE2Test, SmallLatinTest) {
|
| - FLAGS_filtered_re2_min_atom_len = 3;
|
| - FilterTestVars v;
|
| - int id;
|
| -
|
| - v.opts.set_encoding(RE2::Options::EncodingLatin1);
|
| - v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id);
|
| - v.f.Compile(&v.atoms);
|
| - EXPECT_EQ(1, v.atoms.size());
|
| - EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef");
|
| -
|
| - v.atom_indices.push_back(0);
|
| - v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches);
|
| - EXPECT_EQ(1, v.matches.size());
|
| - EXPECT_EQ(id, v.matches[0]);
|
| -}
|
| -
|
| -struct AtomTest {
|
| - const char* testname;
|
| - // If any test needs more than this many regexps or atoms, increase
|
| - // the size of the corresponding array.
|
| - const char* regexps[20];
|
| - const char* atoms[20];
|
| -};
|
| -
|
| -AtomTest atom_tests[] = {
|
| - {
|
| - // This test checks to make sure empty patterns are allowed.
|
| - "CheckEmptyPattern",
|
| - {""},
|
| - {}
|
| - }, {
|
| - // This test checks that all atoms of length greater than min length
|
| - // are found, and no atoms that are of smaller length are found.
|
| - "AllAtomsGtMinLengthFound", {
|
| - "(abc123|def456|ghi789).*mnop[x-z]+",
|
| - "abc..yyy..zz",
|
| - "mnmnpp[a-z]+PPP"
|
| - }, {
|
| - "abc123",
|
| - "def456",
|
| - "ghi789",
|
| - "mnop",
|
| - "abc",
|
| - "yyy",
|
| - "mnmnpp",
|
| - "ppp"
|
| - }
|
| - }, {
|
| - // Test to make sure that any atoms that have another atom as a
|
| - // substring in an OR are removed; that is, only the shortest
|
| - // substring is kept.
|
| - "SubstrAtomRemovesSuperStrInOr", {
|
| - "(abc123|abc|ghi789|abc1234).*[x-z]+",
|
| - "abcd..yyy..yyyzzz",
|
| - "mnmnpp[a-z]+PPP"
|
| - }, {
|
| - "abc",
|
| - "ghi789",
|
| - "abcd",
|
| - "yyy",
|
| - "yyyzzz",
|
| - "mnmnpp",
|
| - "ppp"
|
| - }
|
| - }, {
|
| - // Test character class expansion.
|
| - "CharClassExpansion", {
|
| - "m[a-c][d-f]n.*[x-z]+",
|
| - "[x-y]bcde[ab]"
|
| - }, {
|
| - "madn", "maen", "mafn",
|
| - "mbdn", "mben", "mbfn",
|
| - "mcdn", "mcen", "mcfn",
|
| - "xbcdea", "xbcdeb",
|
| - "ybcdea", "ybcdeb"
|
| - }
|
| - }, {
|
| - // Test upper/lower of non-ASCII.
|
| - "UnicodeLower", {
|
| - "(?i)ΔδΠϖπΣςσ",
|
| - "ΛΜΝΟΠ",
|
| - "ψρστυ",
|
| - }, {
|
| - "δδπππσσσ",
|
| - "λμνοπ",
|
| - "ψρστυ",
|
| - },
|
| - },
|
| -};
|
| -
|
| -void AddRegexpsAndCompile(const char* regexps[],
|
| - int n,
|
| - struct FilterTestVars* v) {
|
| - for (int i = 0; i < n; i++) {
|
| - int id;
|
| - v->f.Add(regexps[i], v->opts, &id);
|
| - }
|
| - v->f.Compile(&v->atoms);
|
| -}
|
| -
|
| -bool CheckExpectedAtoms(const char* atoms[],
|
| - int n,
|
| - const char* testname,
|
| - struct FilterTestVars* v) {
|
| - vector<string> expected;
|
| - for (int i = 0; i < n; i++)
|
| - expected.push_back(atoms[i]);
|
| -
|
| - bool pass = expected.size() == v->atoms.size();
|
| -
|
| - sort(v->atoms.begin(), v->atoms.end());
|
| - sort(expected.begin(), expected.end());
|
| - for (int i = 0; pass && i < n; i++)
|
| - pass = pass && expected[i] == v->atoms[i];
|
| -
|
| - if (!pass) {
|
| - LOG(WARNING) << "Failed " << testname;
|
| - LOG(WARNING) << "Expected #atoms = " << expected.size();
|
| - for (size_t i = 0; i < expected.size(); i++)
|
| - LOG(WARNING) << expected[i];
|
| - LOG(WARNING) << "Found #atoms = " << v->atoms.size();
|
| - for (size_t i = 0; i < v->atoms.size(); i++)
|
| - LOG(WARNING) << v->atoms[i];
|
| - }
|
| -
|
| - return pass;
|
| -}
|
| -
|
| -TEST(FilteredRE2Test, AtomTests) {
|
| - FLAGS_filtered_re2_min_atom_len = 3;
|
| -
|
| - int nfail = 0;
|
| - for (int i = 0; i < arraysize(atom_tests); i++) {
|
| - FilterTestVars v;
|
| - AtomTest* t = &atom_tests[i];
|
| - int natom, nregexp;
|
| - for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
|
| - if (t->regexps[nregexp] == NULL)
|
| - break;
|
| - for (natom = 0; natom < arraysize(t->atoms); natom++)
|
| - if (t->atoms[natom] == NULL)
|
| - break;
|
| - AddRegexpsAndCompile(t->regexps, nregexp, &v);
|
| - if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v))
|
| - nfail++;
|
| - }
|
| - EXPECT_EQ(0, nfail);
|
| -}
|
| -
|
| -void FindAtomIndices(const vector<string>& atoms,
|
| - const vector<string>& matched_atoms,
|
| - vector<int>* atom_indices) {
|
| - atom_indices->clear();
|
| - for (size_t i = 0; i < matched_atoms.size(); i++) {
|
| - for (size_t j = 0; j < atoms.size(); j++) {
|
| - if (matched_atoms[i] == atoms[j]) {
|
| - atom_indices->push_back(static_cast<int>(j));
|
| - break;
|
| - }
|
| - }
|
| - }
|
| -}
|
| -
|
| -TEST(FilteredRE2Test, MatchEmptyPattern) {
|
| - FLAGS_filtered_re2_min_atom_len = 3;
|
| -
|
| - FilterTestVars v;
|
| - AtomTest* t = &atom_tests[0];
|
| - // We are using the regexps used in one of the atom tests
|
| - // for this test. Adding the EXPECT here to make sure
|
| - // the index we use for the test is for the correct test.
|
| - EXPECT_EQ("CheckEmptyPattern", string(t->testname));
|
| - int nregexp;
|
| - for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
|
| - if (t->regexps[nregexp] == NULL)
|
| - break;
|
| - AddRegexpsAndCompile(t->regexps, nregexp, &v);
|
| - string text = "0123";
|
| - vector<int> atom_ids;
|
| - vector<int> matching_regexps;
|
| - EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
|
| -}
|
| -
|
| -TEST(FilteredRE2Test, MatchTests) {
|
| - FLAGS_filtered_re2_min_atom_len = 3;
|
| -
|
| - FilterTestVars v;
|
| - AtomTest* t = &atom_tests[2];
|
| - // We are using the regexps used in one of the atom tests
|
| - // for this test.
|
| - EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", string(t->testname));
|
| - int nregexp;
|
| - for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
|
| - if (t->regexps[nregexp] == NULL)
|
| - break;
|
| - AddRegexpsAndCompile(t->regexps, nregexp, &v);
|
| -
|
| - string text = "abc121212xyz";
|
| - // atoms = abc
|
| - vector<int> atom_ids;
|
| - vector<string> atoms;
|
| - atoms.push_back("abc");
|
| - FindAtomIndices(v.atoms, atoms, &atom_ids);
|
| - vector<int> matching_regexps;
|
| - v.f.AllMatches(text, atom_ids, &matching_regexps);
|
| - EXPECT_EQ(1, matching_regexps.size());
|
| -
|
| - text = "abc12312yyyzzz";
|
| - atoms.clear();
|
| - atoms.push_back("abc");
|
| - atoms.push_back("yyy");
|
| - atoms.push_back("yyyzzz");
|
| - FindAtomIndices(v.atoms, atoms, &atom_ids);
|
| - v.f.AllMatches(text, atom_ids, &matching_regexps);
|
| - EXPECT_EQ(1, matching_regexps.size());
|
| -
|
| - text = "abcd12yyy32yyyzzz";
|
| - atoms.clear();
|
| - atoms.push_back("abc");
|
| - atoms.push_back("abcd");
|
| - atoms.push_back("yyy");
|
| - atoms.push_back("yyyzzz");
|
| - FindAtomIndices(v.atoms, atoms, &atom_ids);
|
| - LOG(INFO) << "S: " << atom_ids.size();
|
| - for (size_t i = 0; i < atom_ids.size(); i++)
|
| - LOG(INFO) << "i: " << i << " : " << atom_ids[i];
|
| - v.f.AllMatches(text, atom_ids, &matching_regexps);
|
| - EXPECT_EQ(2, matching_regexps.size());
|
| -}
|
| -
|
| -} // namespace re2
|
|
|