Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(65)

Unified Diff: third_party/re2/re2/testing/dfa_test.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/testing/compile_test.cc ('k') | third_party/re2/re2/testing/dump.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/testing/dfa_test.cc
diff --git a/third_party/re2/re2/testing/dfa_test.cc b/third_party/re2/re2/testing/dfa_test.cc
deleted file mode 100644
index e9c7befd6906d871bff780775317c8debe1a22a8..0000000000000000000000000000000000000000
--- a/third_party/re2/re2/testing/dfa_test.cc
+++ /dev/null
@@ -1,348 +0,0 @@
-// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "util/thread.h"
-#include "util/test.h"
-#include "re2/prog.h"
-#include "re2/re2.h"
-#include "re2/regexp.h"
-#include "re2/testing/regexp_generator.h"
-#include "re2/testing/string_generator.h"
-
-static const bool UsingMallocCounter = false;
-
-DECLARE_bool(re2_dfa_bail_when_slow);
-
-DEFINE_int32(size, 8, "log2(number of DFA nodes)");
-DEFINE_int32(repeat, 2, "Repetition count.");
-DEFINE_int32(threads, 4, "number of threads");
-
-namespace re2 {
-
-// Check that multithreaded access to DFA class works.
-
-// Helper thread: builds entire DFA for prog.
-class BuildThread : public Thread {
- public:
- BuildThread(Prog* prog) : prog_(prog) {}
- virtual void Run() {
- CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch));
- }
-
- private:
- Prog* prog_;
-};
-
-TEST(Multithreaded, BuildEntireDFA) {
- // Create regexp with 2^FLAGS_size states in DFA.
- string s = "a";
- for (int i = 0; i < FLAGS_size; i++)
- s += "[ab]";
- s += "b";
-
- // Check that single-threaded code works.
- {
- //LOG(INFO) << s;
- Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
- CHECK(re);
- Prog* prog = re->CompileToProg(0);
- CHECK(prog);
- BuildThread* t = new BuildThread(prog);
- t->SetJoinable(true);
- t->Start();
- t->Join();
- delete t;
- delete prog;
- re->Decref();
- }
-
- // Build the DFA simultaneously in a bunch of threads.
- for (int i = 0; i < FLAGS_repeat; i++) {
- Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
- CHECK(re);
- Prog* prog = re->CompileToProg(0);
- CHECK(prog);
-
- vector<BuildThread*> threads;
- for (int j = 0; j < FLAGS_threads; j++) {
- BuildThread *t = new BuildThread(prog);
- t->SetJoinable(true);
- threads.push_back(t);
- }
- for (int j = 0; j < FLAGS_threads; j++)
- threads[j]->Start();
- for (int j = 0; j < FLAGS_threads; j++) {
- threads[j]->Join();
- delete threads[j];
- }
-
- // One more compile, to make sure everything is okay.
- prog->BuildEntireDFA(Prog::kFirstMatch);
- delete prog;
- re->Decref();
- }
-}
-
-// Check that DFA size requirements are followed.
-// BuildEntireDFA will, like SearchDFA, stop building out
-// the DFA once the memory limits are reached.
-TEST(SingleThreaded, BuildEntireDFA) {
- // Create regexp with 2^30 states in DFA.
- string s = "a";
- for (int i = 0; i < 30; i++)
- s += "[ab]";
- s += "b";
-
- Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
- CHECK(re);
- int max = 24;
- for (int i = 17; i < max; i++) {
- int64 limit = 1<<i;
- int64 usage;
- //int64 progusage, dfamem;
- {
- testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
- Prog* prog = re->CompileToProg(limit);
- CHECK(prog);
- //progusage = m.HeapGrowth();
- //dfamem = prog->dfa_mem();
- prog->BuildEntireDFA(Prog::kFirstMatch);
- prog->BuildEntireDFA(Prog::kLongestMatch);
- usage = m.HeapGrowth();
- delete prog;
- }
- if (!UsingMallocCounter)
- continue;
- //LOG(INFO) << "limit " << limit << ", "
- // << "prog usage " << progusage << ", "
- // << "DFA budget " << dfamem << ", "
- // << "total " << usage;
- // Tolerate +/- 10%.
- CHECK_GT(usage, limit*9/10);
- CHECK_LT(usage, limit*11/10);
- }
- re->Decref();
-}
-
-// Generates and returns a string over binary alphabet {0,1} that contains
-// all possible binary sequences of length n as subsequences. The obvious
-// brute force method would generate a string of length n * 2^n, but this
-// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
-// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
-// Such a string is useful for testing a DFA. If you have a DFA
-// where distinct last n bytes implies distinct states, then running on a
-// DeBruijn string causes the DFA to need to create a new state at every
-// position in the input, never reusing any states until it gets to the
-// end of the string. This is the worst possible case for DFA execution.
-static string DeBruijnString(int n) {
- CHECK_LT(n, static_cast<int>(8*sizeof(int)));
- CHECK_GT(n, 0);
-
- vector<bool> did(1<<n);
- for (int i = 0; i < 1<<n; i++)
- did[i] = false;
-
- string s;
- for (int i = 0; i < n-1; i++)
- s.append("0");
- int bits = 0;
- int mask = (1<<n) - 1;
- for (int i = 0; i < (1<<n); i++) {
- bits <<= 1;
- bits &= mask;
- if (!did[bits|1]) {
- bits |= 1;
- s.append("1");
- } else {
- s.append("0");
- }
- CHECK(!did[bits]);
- did[bits] = true;
- }
- return s;
-}
-
-// Test that the DFA gets the right result even if it runs
-// out of memory during a search. The regular expression
-// 0[01]{n}$ matches a binary string of 0s and 1s only if
-// the (n+1)th-to-last character is a 0. Matching this in
-// a single forward pass (as done by the DFA) requires
-// keeping one bit for each of the last n+1 characters
-// (whether each was a 0), or 2^(n+1) possible states.
-// If we run this regexp to search in a string that contains
-// every possible n-character binary string as a substring,
-// then it will have to run through at least 2^n states.
-// States are big data structures -- certainly more than 1 byte --
-// so if the DFA can search correctly while staying within a
-// 2^n byte limit, it must be handling out-of-memory conditions
-// gracefully.
-TEST(SingleThreaded, SearchDFA) {
- // Choice of n is mostly arbitrary, except that:
- // * making n too big makes the test run for too long.
- // * making n too small makes the DFA refuse to run,
- // because it has so little memory compared to the program size.
- // Empirically, n = 18 is a good compromise between the two.
- const int n = 18;
-
- Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
- Regexp::LikePerl, NULL);
- CHECK(re);
-
- // The De Bruijn string for n ends with a 1 followed by n 0s in a row,
- // which is not a match for 0[01]{n}$. Adding one more 0 is a match.
- string no_match = DeBruijnString(n);
- string match = no_match + "0";
-
- // The De Bruijn string is the worst case input for this regexp.
- // By default, the DFA will notice that it is flushing its cache
- // too frequently and will bail out early, so that RE2 can use the
- // NFA implementation instead. (The DFA loses its speed advantage
- // if it can't get a good cache hit rate.)
- // Tell the DFA to trudge along instead.
- FLAGS_re2_dfa_bail_when_slow = false;
-
- int64 usage;
- int64 peak_usage;
- {
- testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
- Prog* prog = re->CompileToProg(1<<n);
- CHECK(prog);
- for (int i = 0; i < 10; i++) {
- bool matched, failed = false;
- matched = prog->SearchDFA(match, NULL,
- Prog::kUnanchored, Prog::kFirstMatch,
- NULL, &failed, NULL);
- CHECK(!failed);
- CHECK(matched);
- matched = prog->SearchDFA(no_match, NULL,
- Prog::kUnanchored, Prog::kFirstMatch,
- NULL, &failed, NULL);
- CHECK(!failed);
- CHECK(!matched);
- }
- usage = m.HeapGrowth();
- peak_usage = m.PeakHeapGrowth();
- delete prog;
- }
- if (!UsingMallocCounter)
- return;
- //LOG(INFO) << "usage " << usage << ", "
- // << "peak usage " << peak_usage;
- CHECK_LT(usage, 1<<n);
- CHECK_LT(peak_usage, 1<<n);
- re->Decref();
-}
-
-// Helper thread: searches for match, which should match,
-// and no_match, which should not.
-class SearchThread : public Thread {
- public:
- SearchThread(Prog* prog, const StringPiece& match,
- const StringPiece& no_match)
- : prog_(prog), match_(match), no_match_(no_match) {}
-
- virtual void Run() {
- for (int i = 0; i < 2; i++) {
- bool matched, failed = false;
- matched = prog_->SearchDFA(match_, NULL,
- Prog::kUnanchored, Prog::kFirstMatch,
- NULL, &failed, NULL);
- CHECK(!failed);
- CHECK(matched);
- matched = prog_->SearchDFA(no_match_, NULL,
- Prog::kUnanchored, Prog::kFirstMatch,
- NULL, &failed, NULL);
- CHECK(!failed);
- CHECK(!matched);
- }
- }
-
- private:
- Prog* prog_;
- StringPiece match_;
- StringPiece no_match_;
-};
-
-TEST(Multithreaded, SearchDFA) {
- // Same as single-threaded test above.
- const int n = 18;
- Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
- Regexp::LikePerl, NULL);
- CHECK(re);
- string no_match = DeBruijnString(n);
- string match = no_match + "0";
- FLAGS_re2_dfa_bail_when_slow = false;
-
- // Check that single-threaded code works.
- {
- Prog* prog = re->CompileToProg(1<<n);
- CHECK(prog);
- SearchThread* t = new SearchThread(prog, match, no_match);
- t->SetJoinable(true);
- t->Start();
- t->Join();
- delete t;
- delete prog;
- }
-
- // Run the search simultaneously in a bunch of threads.
- // Reuse same flags for Multithreaded.BuildDFA above.
- for (int i = 0; i < FLAGS_repeat; i++) {
- //LOG(INFO) << "Search " << i;
- Prog* prog = re->CompileToProg(1<<n);
- CHECK(prog);
-
- vector<SearchThread*> threads;
- for (int j = 0; j < FLAGS_threads; j++) {
- SearchThread *t = new SearchThread(prog, match, no_match);
- t->SetJoinable(true);
- threads.push_back(t);
- }
- for (int j = 0; j < FLAGS_threads; j++)
- threads[j]->Start();
- for (int j = 0; j < FLAGS_threads; j++) {
- threads[j]->Join();
- delete threads[j];
- }
- delete prog;
- }
- re->Decref();
-}
-
-struct ReverseTest {
- const char *regexp;
- const char *text;
- bool match;
-};
-
-// Test that reverse DFA handles anchored/unanchored correctly.
-// It's in the DFA interface but not used by RE2.
-ReverseTest reverse_tests[] = {
- { "\\A(a|b)", "abc", true },
- { "(a|b)\\z", "cba", true },
- { "\\A(a|b)", "cba", false },
- { "(a|b)\\z", "abc", false },
-};
-
-TEST(DFA, ReverseMatch) {
- int nfail = 0;
- for (int i = 0; i < arraysize(reverse_tests); i++) {
- const ReverseTest& t = reverse_tests[i];
- Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
- CHECK(re);
- Prog *prog = re->CompileToReverseProg(0);
- CHECK(prog);
- bool failed = false;
- bool matched = prog->SearchDFA(t.text, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL);
- if (matched != t.match) {
- LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
- nfail++;
- }
- delete prog;
- re->Decref();
- }
- EXPECT_EQ(nfail, 0);
-}
-
-} // namespace re2
« no previous file with comments | « third_party/re2/re2/testing/compile_test.cc ('k') | third_party/re2/re2/testing/dump.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698