Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(234)

Unified Diff: third_party/re2/re2/dfa.cc

Issue 1530113002: Revert of Update re2 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/re2/re2/compile.cc ('k') | third_party/re2/re2/filtered_re2.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/re2/re2/dfa.cc
diff --git a/third_party/re2/re2/dfa.cc b/third_party/re2/re2/dfa.cc
index 1f54b9f942eb4804cc89a9ce3e6dadb75c3caf3f..f1fc7b0caf1b57570b2a81f1cfd6ee6b766bd36b 100644
--- a/third_party/re2/re2/dfa.cc
+++ b/third_party/re2/re2/dfa.cc
@@ -21,11 +21,13 @@
//
// See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
+#include "re2/prog.h"
+#include "re2/stringpiece.h"
#include "util/atomicops.h"
#include "util/flags.h"
#include "util/sparse_set.h"
-#include "re2/prog.h"
-#include "re2/stringpiece.h"
+
+#define NO_THREAD_SAFETY_ANALYSIS
DEFINE_bool(re2_dfa_bail_when_slow, true,
"Whether the RE2 DFA should bail out early "
@@ -94,7 +96,7 @@
// States, linked by the next_ pointers. If in state s and reading
// byte c, the next state should be s->next_[c].
struct State {
- inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
+ inline bool IsMatch() const { return flag_ & kFlagMatch; }
void SaveMatch(vector<int>* v);
int* inst_; // Instruction pointers in the state.
@@ -143,7 +145,7 @@
if (sizeof(size_t) == sizeof(uint32))
return Hash32StringWithSeed(s, len, a->flag_);
else
- return static_cast<size_t>(Hash64StringWithSeed(s, len, a->flag_));
+ return Hash64StringWithSeed(s, len, a->flag_);
}
#ifdef STL_MSVC
// Less than operator.
@@ -228,8 +230,9 @@
// sets *ismatch to true.
// L >= mutex_
void RunWorkqOnByte(Workq* q, Workq* nq,
- int c, uint flag, bool* ismatch,
- Prog::MatchKind kind);
+ int c, uint flag, bool* ismatch,
+ Prog::MatchKind kind,
+ int new_byte_loop);
// Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
// L >= mutex_
@@ -274,7 +277,7 @@
vector<int>* matches;
private:
- DISALLOW_COPY_AND_ASSIGN(SearchParams);
+ DISALLOW_EVIL_CONSTRUCTORS(SearchParams);
};
// Before each search, the parameters to Search are analyzed by
@@ -339,6 +342,7 @@
// Constant after initialization.
Prog* prog_; // The regular expression program to run.
Prog::MatchKind kind_; // The kind of DFA.
+ int start_unanchored_; // start of unanchored program
bool init_failed_; // initialization failed (out of memory)
Mutex mutex_; // mutex_ >= cache_mutex_.r
@@ -426,7 +430,7 @@
int maxmark_; // maximum number of marks
int nextmark_; // id of next mark
bool last_was_mark_; // last inserted was mark
- DISALLOW_COPY_AND_ASSIGN(Workq);
+ DISALLOW_EVIL_CONSTRUCTORS(Workq);
};
DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem)
@@ -441,8 +445,11 @@
if (DebugDFA)
fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
int nmark = 0;
- if (kind_ == Prog::kLongestMatch)
+ start_unanchored_ = 0;
+ if (kind_ == Prog::kLongestMatch) {
nmark = prog->size();
+ start_unanchored_ = prog->start_unanchored();
+ }
nastack_ = 2 * prog->size() + nmark;
// Account for space needed for DFA, q0, q1, astack.
@@ -451,7 +458,7 @@
(sizeof(int)+sizeof(int)) * 2; // q0, q1
mem_budget_ -= nastack_ * sizeof(int); // astack
if (mem_budget_ < 0) {
- LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld",
+ LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
prog_->size(), max_mem);
init_failed_ = true;
return;
@@ -466,7 +473,7 @@
int64 one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
(prog_->bytemap_range()+1)*sizeof(State*);
if (state_budget_ < 20*one_state) {
- LOG(INFO) << StringPrintf("DFA out of memory: prog size %d mem %lld",
+ LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
prog_->size(), max_mem);
init_failed_ = true;
return;
@@ -782,7 +789,7 @@
it != state_cache_.end(); ++it)
v.push_back(*it);
state_cache_.clear();
- for (size_t i = 0; i < v.size(); i++)
+ for (int i = 0; i < v.size(); i++)
delete[] reinterpret_cast<const char*>(v[i]);
}
@@ -864,10 +871,8 @@
break;
case kInstEmptyWidth:
- // Continue on if we have all the right flag bits.
- if (ip->empty() & ~flag)
- break;
- stk[nstk++] = ip->out();
+ if ((ip->empty() & flag) == ip->empty())
+ stk[nstk++] = ip->out();
break;
}
}
@@ -905,7 +910,8 @@
// regular expression program has been reached (the regexp has matched).
void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
int c, uint flag, bool* ismatch,
- Prog::MatchKind kind) {
+ Prog::MatchKind kind,
+ int new_byte_loop) {
if (DEBUG_MODE)
mutex_.AssertHeld();
@@ -984,8 +990,9 @@
}
// If someone else already computed this, return it.
- State* ns;
- ATOMIC_LOAD_CONSUME(ns, &state->next_[ByteMap(c)]);
+ MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
+ State* ns = state->next_[ByteMap(c)];
+ ANNOTATE_HAPPENS_AFTER(ns);
if (ns != NULL)
return ns;
@@ -1015,8 +1022,8 @@
// The state flag kFlagLastWord says whether the last
// byte processed was a word character. Use that info to
// insert empty-width (non-)word boundaries.
- bool islastword = (state->flag_ & kFlagLastWord) != 0;
- bool isword = (c != kByteEndText && Prog::IsWordChar(static_cast<uint8>(c)));
+ bool islastword = state->flag_ & kFlagLastWord;
+ bool isword = (c != kByteEndText && Prog::IsWordChar(c));
if (isword == islastword)
beforeflag |= kEmptyNonWordBoundary;
else
@@ -1029,8 +1036,8 @@
swap(q0_, q1_);
}
bool ismatch = false;
- RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_);
-
+ RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_, start_unanchored_);
+
// Most of the time, we build the state from the output of
// RunWorkqOnByte, so swap q0_ and q1_ here. However, so that
// RE2::Set can tell exactly which match instructions
@@ -1051,11 +1058,18 @@
ns = WorkqToCachedState(q0_, flag);
- // Flush ns before linking to it.
// Write barrier before updating state->next_ so that the
// main search loop can proceed without any locking, for speed.
// (Otherwise it would need one mutex operation per input byte.)
- ATOMIC_STORE_RELEASE(&state->next_[ByteMap(c)], ns);
+ // The annotations below tell race detectors that:
+ // a) the access to next_ should be ignored,
+ // b) 'ns' is properly published.
+ WriteMemoryBarrier(); // Flush ns before linking to it.
+
+ ANNOTATE_IGNORE_WRITES_BEGIN();
+ ANNOTATE_HAPPENS_BEFORE(ns);
+ state->next_[ByteMap(c)] = ns;
+ ANNOTATE_IGNORE_WRITES_END();
return ns;
}
@@ -1098,7 +1112,7 @@
Mutex* mu_;
bool writing_;
- DISALLOW_COPY_AND_ASSIGN(RWLocker);
+ DISALLOW_EVIL_CONSTRUCTORS(RWLocker);
};
DFA::RWLocker::RWLocker(Mutex* mu)
@@ -1198,7 +1212,7 @@
bool is_special_; // whether original state was special
State* special_; // if is_special_, the original state
- DISALLOW_COPY_AND_ASSIGN(StateSaver);
+ DISALLOW_EVIL_CONSTRUCTORS(StateSaver);
};
DFA::StateSaver::StateSaver(DFA* dfa, State* state) {
@@ -1376,8 +1390,9 @@
// Okay to use bytemap[] not ByteMap() here, because
// c is known to be an actual byte and not kByteEndText.
- State* ns;
- ATOMIC_LOAD_CONSUME(ns, &s->next_[bytemap[c]]);
+ MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
+ State* ns = s->next_[bytemap[c]];
+ ANNOTATE_HAPPENS_AFTER(ns);
if (ns == NULL) {
ns = RunStateOnByteUnlocked(s, c);
if (ns == NULL) {
@@ -1390,7 +1405,7 @@
// of 10 bytes per state computation, fail so that RE2 can
// fall back to the NFA.
if (FLAGS_re2_dfa_bail_when_slow && resetp != NULL &&
- static_cast<unsigned long>(p - resetp) < 10*state_cache_.size()) {
+ (p - resetp) < 10*state_cache_.size()) {
params->failed = true;
return false;
}
@@ -1464,8 +1479,9 @@
lastbyte = params->text.begin()[-1] & 0xFF;
}
- State* ns;
- ATOMIC_LOAD_CONSUME(ns, &s->next_[ByteMap(lastbyte)]);
+ MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering
+ State* ns = s->next_[ByteMap(lastbyte)];
+ ANNOTATE_HAPPENS_AFTER(ns);
if (ns == NULL) {
ns = RunStateOnByteUnlocked(s, lastbyte);
if (ns == NULL) {
@@ -1653,16 +1669,13 @@
}
}
- if (DebugDFA) {
- int fb;
- ATOMIC_LOAD_RELAXED(fb, &info->firstbyte);
+ if (DebugDFA)
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n",
params->anchored, params->run_forward, flags,
- DumpState(info->start).c_str(), fb);
- }
+ DumpState(info->start).c_str(), info->firstbyte);
params->start = info->start;
- ATOMIC_LOAD_ACQUIRE(params->firstbyte, &info->firstbyte);
+ params->firstbyte = ANNOTATE_UNPROTECTED_READ(info->firstbyte);
return true;
}
@@ -1670,15 +1683,17 @@
// Fills in info if needed. Returns true on success, false on failure.
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
uint flags) {
- // Quick check.
- int fb;
- ATOMIC_LOAD_ACQUIRE(fb, &info->firstbyte);
- if (fb != kFbUnknown)
+ // Quick check; okay because of memory barriers below.
+ if (ANNOTATE_UNPROTECTED_READ(info->firstbyte) != kFbUnknown) {
+ ANNOTATE_HAPPENS_AFTER(&info->firstbyte);
return true;
+ }
MutexLock l(&mutex_);
- if (info->firstbyte != kFbUnknown)
+ if (info->firstbyte != kFbUnknown) {
+ ANNOTATE_HAPPENS_AFTER(&info->firstbyte);
return true;
+ }
q0_->clear();
AddToQueue(q0_,
@@ -1689,14 +1704,16 @@
return false;
if (info->start == DeadState) {
- // Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone);
+ ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
+ WriteMemoryBarrier(); // Synchronize with "quick check" above.
+ info->firstbyte = kFbNone;
return true;
}
if (info->start == FullMatchState) {
- // Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone); // will be ignored
+ ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
+ WriteMemoryBarrier(); // Synchronize with "quick check" above.
+ info->firstbyte = kFbNone; // will be ignored
return true;
}
@@ -1707,8 +1724,9 @@
for (int i = 0; i < 256; i++) {
State* s = RunStateOnByte(info->start, i);
if (s == NULL) {
- // Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
+ ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
+ WriteMemoryBarrier(); // Synchronize with "quick check" above.
+ info->firstbyte = firstbyte;
return false;
}
if (s == info->start)
@@ -1721,8 +1739,9 @@
break;
}
}
- // Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte);
+ ANNOTATE_HAPPENS_BEFORE(&info->firstbyte);
+ WriteMemoryBarrier(); // Synchronize with "quick check" above.
+ info->firstbyte = firstbyte;
return true;
}
@@ -1802,16 +1821,19 @@
pdfa = &dfa_longest_;
}
- // Quick check.
- DFA *dfa;
- ATOMIC_LOAD_ACQUIRE(dfa, pdfa);
- if (dfa != NULL)
+ // Quick check; okay because of memory barrier below.
+ DFA *dfa = ANNOTATE_UNPROTECTED_READ(*pdfa);
+ if (dfa != NULL) {
+ ANNOTATE_HAPPENS_AFTER(dfa);
return dfa;
+ }
MutexLock l(&dfa_mutex_);
dfa = *pdfa;
- if (dfa != NULL)
+ if (dfa != NULL) {
+ ANNOTATE_HAPPENS_AFTER(dfa);
return dfa;
+ }
// For a forward DFA, half the memory goes to each DFA.
// For a reverse DFA, all the memory goes to the
@@ -1828,7 +1850,9 @@
delete_dfa_ = DeleteDFA;
// Synchronize with "quick check" above.
- ATOMIC_STORE_RELEASE(pdfa, dfa);
+ ANNOTATE_HAPPENS_BEFORE(dfa);
+ WriteMemoryBarrier();
+ *pdfa = dfa;
return dfa;
}
@@ -1901,9 +1925,9 @@
// as the beginning.
if (match0) {
if (reversed_)
- match0->set(ep, static_cast<int>(text.end() - ep));
+ *match0 = StringPiece(ep, text.end() - ep);
else
- match0->set(text.begin(), static_cast<int>(ep - text.begin()));
+ *match0 = StringPiece(text.begin(), ep - text.begin());
}
return true;
}
@@ -1928,7 +1952,7 @@
q.push_back(params.start);
// Flood to expand every state.
- for (size_t i = 0; i < q.size(); i++) {
+ for (int i = 0; i < q.size(); i++) {
State* s = q[i];
for (int c = 0; c < 257; c++) {
State* ns = RunStateOnByteUnlocked(s, c);
@@ -1939,7 +1963,7 @@
}
}
- return static_cast<int>(q.size());
+ return q.size();
}
// Build out all states in DFA for kind. Returns number of states.
@@ -2011,7 +2035,6 @@
// Build minimum prefix.
State* s = params.start;
min->clear();
- MutexLock lock(&mutex_);
for (int i = 0; i < maxlen; i++) {
if (previously_visited_states[s] > kMaxEltRepetitions) {
VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions
@@ -2021,7 +2044,7 @@
previously_visited_states[s]++;
// Stop if min is a match.
- State* ns = RunStateOnByte(s, kByteEndText);
+ State* ns = RunStateOnByteUnlocked(s, kByteEndText);
if (ns == NULL) // DFA out of memory
return false;
if (ns != DeadState && (ns == FullMatchState || ns->IsMatch()))
@@ -2030,13 +2053,13 @@
// Try to extend the string with low bytes.
bool extended = false;
for (int j = 0; j < 256; j++) {
- ns = RunStateOnByte(s, j);
+ ns = RunStateOnByteUnlocked(s, j);
if (ns == NULL) // DFA out of memory
return false;
if (ns == FullMatchState ||
(ns > SpecialStateMax && ns->ninst_ > 0)) {
extended = true;
- min->append(1, static_cast<char>(j));
+ min->append(1, j);
s = ns;
break;
}
@@ -2060,13 +2083,13 @@
// Try to extend the string with high bytes.
bool extended = false;
for (int j = 255; j >= 0; j--) {
- State* ns = RunStateOnByte(s, j);
+ State* ns = RunStateOnByteUnlocked(s, j);
if (ns == NULL)
return false;
if (ns == FullMatchState ||
(ns > SpecialStateMax && ns->ninst_ > 0)) {
extended = true;
- max->append(1, static_cast<char>(j));
+ max->append(1, j);
s = ns;
break;
}
@@ -2099,12 +2122,11 @@
MutexLock l(&dfa_mutex_);
// Have to use dfa_longest_ to get all strings for full matches.
// For example, (a|aa) never matches aa in first-match mode.
+ if (dfa_longest_ == NULL) {
+ dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
+ delete_dfa_ = DeleteDFA;
+ }
dfa = dfa_longest_;
- if (dfa == NULL) {
- dfa = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
- ATOMIC_STORE_RELEASE(&dfa_longest_, dfa);
- delete_dfa_ = DeleteDFA;
- }
}
return dfa->PossibleMatchRange(min, max, maxlen);
}
« no previous file with comments | « third_party/re2/re2/compile.cc ('k') | third_party/re2/re2/filtered_re2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698